getwd()
## [1] "/Users/victoriafield/Library/Mobile Documents/com~apple~CloudDocs/Masters Thesis/Chapter Two CSLAP Manuscript/R - data and outputs"
CSLAP<-read.csv("./raw_data/CSLAP 2012-2017 EDI Original.csv", na.strings = c("", " ", "NA", "NaN"))
str(CSLAP)
## 'data.frame': 5025 obs. of 47 variables:
## $ LakeID : chr "1005GLE0441" "1005GLE0441" "1005GLE0441" "1005GLE0441" ...
## $ Lake.Name : chr "Glen Lake" "Glen Lake" "Glen Lake" "Glen Lake" ...
## $ Proj..Code : num 8 8 8 8 8 8 8 8 8 8 ...
## $ Sample.Name : chr "16-08-02" "16-08-03" "16-08-04" "16-08-05" ...
## $ Sample.Date : chr "07/04/2016" "07/11/2016" "07/26/2016" "08/01/2016" ...
## $ Data.Provider : chr "CSL" "CSL" "CSL" "CSL" ...
## $ Info.Type : chr "OW" "OW" "OW" "OW" ...
## $ LocationID : chr "1005GLE0441_C" "1005GLE0441_C" "1005GLE0441_C" "1005GLE0441_C" ...
## $ Latitude : num 43.4 43.4 43.4 43.4 43.4 ...
## $ Longitude : num -73.7 -73.7 -73.7 -73.7 -73.7 ...
## $ Sample.Depth..m. : num 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 ...
## $ Secchi.Depth..m. : num 3.95 4 3.95 4 5.95 4.05 4.9 3.55 2.6 3.3 ...
## $ DO..mg.L. : logi NA NA NA NA NA NA ...
## $ Bottom.DO..mg.L. : logi NA NA NA NA NA NA ...
## $ pH : num 7.59 7.55 8.37 7.37 7.37 7.51 7.99 7.77 7.76 7.45 ...
## $ TP..mg.L. : num 0.0068 0.0081 0.0077 0.0061 0.0063 0.0067 0.0076 0.0086 NA NA ...
## $ TDP..mg.L. : logi NA NA NA NA NA NA ...
## $ Conductance..umho.cm. : num 424 363 451 406 400 ...
## $ Temperature..air..deg.C. : num 21 23 25 22 17 25 26 24 22 21 ...
## $ Temperature..water..deg.C. : num 24 26 26 26 19 27 26 23 25 22 ...
## $ TN..mg.L. : num 0.321 0.197 0.202 0.214 0.215 0.515 0.29 0.229 0.291 0.189 ...
## $ TDN..mg.L. : num NA NA NA NA NA NA NA NA NA NA ...
## $ Ammonia..mg.L. : num NA 0.028 NA 0.076 0.102 NA 0.0075 NA 0.022 0 ...
## $ NOx..mg.L. : num NA 0.0035 NA 0.034 0.029 NA 0.0035 NA 0.00275 NA ...
## $ Nitrate..mg.L. : logi NA NA NA NA NA NA ...
## $ Nitrite..mg.L. : num NA NA NA NA NA NA NA NA NA NA ...
## $ True.Color..ptu. : num 15 23 15 18 11 16 22 14 6 3 ...
## $ Chloride..mg.L. : num NA 51.7 NA NA NA NA 74.1 NA NA NA ...
## $ Calcium..mg.L. : num NA NA NA NA NA NA NA NA 21.3 NA ...
## $ Chl.a..probe...ug.L. : logi NA NA NA NA NA NA ...
## $ Extracted.Chl.a..ug.L. : num 0.8 1.7 1.5 0.6 1.1 1.7 1.5 3.8 13.3 3.2 ...
## $ UFI.SBK.FP.Chl.a..ug.L. : logi NA NA NA NA NA NA ...
## $ UFI.SBK.FP.BGA..ug.L. : logi NA NA NA NA NA NA ...
## $ X.UFI.SBK.Microcystin..ug.L..: logi NA NA NA NA NA NA ...
## $ ESF.FP.Chl.a..ug.L. : num NA NA NA NA NA NA NA NA 2.45 2.21 ...
## $ ESF.FP.BGA..ug.L. : num 0 0 0.36 0.2 0 0.17 0.19 0.42 0 0 ...
## $ ESF.Microcystin..ug.L. : chr "ND" "ND" "ND" "ND" ...
## $ HAB.Status : chr NA NA NA "No Bloom" ...
## $ Alkalinity.as.CaCO3..mg.L. : logi NA NA NA NA NA NA ...
## $ DOC..mg.L. : logi NA NA NA NA NA NA ...
## $ UV.254..1.cm. : logi NA NA NA NA NA NA ...
## $ Iron..ug.L. : logi NA NA NA NA NA NA ...
## $ Manganese..ug.L. : logi NA NA NA NA NA NA ...
## $ Arsenic..ug.L. : num NA NA NA NA NA NA NA NA NA NA ...
## $ QA : int 3 2 2 2 NA 2 2 2 3 2 ...
## $ QB : int 3 3 3 3 NA 4 3 3 3 3 ...
## $ QC : int 3 2 3 3 NA 2 2 3 3 2 ...
Based on output from str() and prior knowledge of the data, to clean this dataset I need to:
-Change column names and remove units
-Identify duplicate levels of Lake_Name, we know there is a duplicate because we have 73 levels of the unique factor LakeID and 72 levels of Lake_Name
-Split Sample_Date variable into two more columns: Sample_Year and Sample_Month
-For each lake, identify how many years the observations span (If less than 3, remove from dataset)
-Clean levels of Data_Provider to one level (“CSL”)
-Clean levels of Info_Type
-Remove character strings from ESF_Microcystin_ug.L
-Read in a merge values for Mean Depth, Catchment Area to Surface Area Ratio, Dreissenid Status By Year, and %Agricultural Cover
colnames(CSLAP)<-c("LakeID", "Lake_Name", "Proj_Code", "Sample_Name", "Sample_Date", "Data_Provider", "Info_Type", "LocationID", "Latitude", "Longitude", "Sample_Depth", "Secchi_Depth", "DO", "Bottom_DO","pH", "TP", "TDP", "Conductance", "Temp_Air", "Temp_Water", "TN", "TDN" , "Ammonia", "NOx", "Nitrate", "Nitrite", "True_Color", "Chloride", "Calcium", "Chl.a_FP", "Extracted_Chl.a", "UFI_SBK_FP_Chl.a", "UFI_SBK_FP_BGA", "UFI_SBK_Microcystin", "ESF_FP_Chl.a","ESF_FP_BGA", "ESF_Microcystin", "HAB_Status", "Alkalinity_CaCO3", "DOC", "UV_254", "Iron", "Manganese", "Arsenic", "QA", "QB", "QC")
Lake_Name:“Forest Lake” and remove white space from Lake_Name: “Hadlock Pond”We want the lake with LakeID:“1301FOR0441” to be called “Lake Forest” instead of “Forest Lake”. And we need to change “Hadlock Pond” to “Hadlock Pond”
CSLAP$Lake_Name<-as.character(CSLAP$Lake_Name) #first, make this column a character vector instead of factor
CSLAP[CSLAP$LakeID == "1301FOR0441", 2] <- rep("Lake Forest", 40) #then, replace the specified cells with "Lake Forest"
CSLAP[CSLAP$LakeID == "1005HAD0432", 2] <- rep("Hadlock Pond", 78) #then, replace the specified cells with "Hadlock Pond"
CSLAP$Lake_Name<-as.factor(CSLAP$Lake_Name) #last, change the column back to factor vector
Sample_Year and Sample_Month#extract sample year and month
CSLAP$Sample_Year<-as.numeric(format(as.Date(CSLAP$Sample_Date, format="%m/%d/%Y"),"%Y"))
CSLAP$Sample_Month<-as.numeric(format(as.Date(CSLAP$Sample_Date, format="%m/%d/%Y"),"%m"))
table(CSLAP$LakeID, CSLAP$Sample_Year)
##
## 2012 2013 2014 2015 2016 2017
## 0201CUB0115 16 16 16 16 16 16
## 0303LOR0009 8 8 0 8 8 8
## 0403RUS0146 0 0 0 0 16 16
## 0404LOO0079 4 8 8 16 16 16
## 0601CHE0213 17 17 16 16 17 16
## 0601GUI0188 16 16 16 13 17 16
## 0602BRA0154 13 12 16 0 19 16
## 0602CRO0073 12 13 4 13 17 14
## 0602EAR0146 22 26 3 20 18 16
## 0602EAT0163 13 12 0 16 16 16
## 0602ECH0081 12 17 17 17 0 0
## 0602GEN0088 12 14 16 18 18 0
## 0602HAT0155 15 17 17 0 16 16
## 0602PET0078 16 12 17 14 17 16
## 0602PLY0107 8 8 8 8 10 8
## 0602SON0072 18 2 23 21 20 16
## 0602TUL0068 10 12 16 17 18 16
## 0602ULI0067 9 12 0 18 17 16
## 0602WAR0094 8 13 16 16 17 16
## 0703CAZ0153 18 24 25 2 23 16
## 0703DER0139A 12 12 16 16 16 16
## 0703KAS0109 7 9 8 8 0 8
## 0703PAN0094 0 0 8 7 8 10
## 0703TUS0153A 12 14 0 16 14 16
## 0704CAN0286 0 0 0 0 1 0
## 0801BRA0689 16 16 16 0 16 16
## 0801EFF0426 0 8 8 0 0 0
## 0801OTT0926 0 8 0 8 8 8
## 0902EAG0083 9 8 0 8 11 9
## 0904SIL0351 10 8 8 9 8 8
## 0906BON0024 12 12 16 14 16 17
## 0906GRA0051 0 10 15 14 8 16
## 0906MIL0055 12 12 16 16 12 16
## 0906OTH0009 0 0 0 19 17 16
## 1004AUG0213 16 16 3 17 16 16
## 1004LIN0315 0 11 16 16 15 0
## 1004PLA0254 24 17 14 22 12 12
## 1005GLE0441 11 18 14 16 16 16
## 1005HAD0432 12 16 17 17 0 16
## 1005SUN0440 15 22 18 15 0 16
## 1102BAB1109 16 18 17 17 16 16
## 1102TAC1129 0 5 5 4 6 6
## 1104EAG0438 12 13 14 12 16 16
## 1104EFN0129 12 12 15 16 16 0
## 1104FOR0340 4 6 4 6 6 7
## 1104FRI0365 16 16 16 0 8 8
## 1104HUN0131 12 12 0 16 16 16
## 1104JEN0130 12 12 15 0 13 16
## 1104PAR0432 0 28 0 0 0 0
## 1104PLE0313 0 0 16 16 16 0
## 1104SAC0314 16 16 0 0 0 16
## 1104SCH0374 28 23 12 33 32 32
## 1201CAN0717 12 12 15 14 15 16
## 1201ECA0697 13 10 17 16 16 16
## 1201GAL0563 12 13 16 16 16 16
## 1201PEC0686 8 0 8 12 16 16
## 1201PLE0745 0 12 16 12 8 8
## 1301BOW0444 0 9 9 8 14 8
## 1301BUR0386C 0 13 9 15 16 16
## 1301FOR0441 0 8 8 7 9 8
## 1301IND0167 0 5 9 7 16 17
## 1301ROA0183B 9 8 0 16 10 10
## 1301SEP0826 0 16 13 15 16 10
## 1310QUE0057 13 12 17 19 14 16
## 1310ROU0080 0 6 7 7 6 8
## 1310SPR0081 0 0 16 16 16 16
## 1401ANA0251 12 14 16 14 16 14
## 1401DEV0174 0 0 16 16 17 12
## 1401SOM0268 24 24 17 16 17 16
## 1402YAN0021 0 8 8 6 0 8
## 1404OQU0383 16 16 16 16 14 16
## 1501TUX1007 16 0 16 17 12 14
## 1701LLO0708 8 8 8 7 8 8
CSLAP$LakeID<-as.character(CSLAP$LakeID) #first, change to a character vector
CSLAP<-CSLAP[CSLAP$LakeID != "0403RUS0146", ]
CSLAP<-CSLAP[CSLAP$LakeID != "0704CAN0286", ]
CSLAP<-CSLAP[CSLAP$LakeID != "0801EFF0426", ]
CSLAP<-CSLAP[CSLAP$LakeID != "1104PAR0432", ]
CSLAP<-CSLAP[CSLAP$LakeID != "0602PLY0107", ] #Plymouth reservoir will also be removed because it is eutrophic
CSLAP$LakeID<-as.factor(CSLAP$LakeID)
CSLAP$LakeID<-droplevels(CSLAP$LakeID)
levels(CSLAP$LakeID)
## [1] "0201CUB0115" "0303LOR0009" "0404LOO0079" "0601CHE0213" "0601GUI0188"
## [6] "0602BRA0154" "0602CRO0073" "0602EAR0146" "0602EAT0163" "0602ECH0081"
## [11] "0602GEN0088" "0602HAT0155" "0602PET0078" "0602SON0072" "0602TUL0068"
## [16] "0602ULI0067" "0602WAR0094" "0703CAZ0153" "0703DER0139A" "0703KAS0109"
## [21] "0703PAN0094" "0703TUS0153A" "0801BRA0689" "0801OTT0926" "0902EAG0083"
## [26] "0904SIL0351" "0906BON0024" "0906GRA0051" "0906MIL0055" "0906OTH0009"
## [31] "1004AUG0213" "1004LIN0315" "1004PLA0254" "1005GLE0441" "1005HAD0432"
## [36] "1005SUN0440" "1102BAB1109" "1102TAC1129" "1104EAG0438" "1104EFN0129"
## [41] "1104FOR0340" "1104FRI0365" "1104HUN0131" "1104JEN0130" "1104PLE0313"
## [46] "1104SAC0314" "1104SCH0374" "1201CAN0717" "1201ECA0697" "1201GAL0563"
## [51] "1201PEC0686" "1201PLE0745" "1301BOW0444" "1301BUR0386C" "1301FOR0441"
## [56] "1301IND0167" "1301ROA0183B" "1301SEP0826" "1310QUE0057" "1310ROU0080"
## [61] "1310SPR0081" "1401ANA0251" "1401DEV0174" "1401SOM0268" "1402YAN0021"
## [66] "1404OQU0383" "1501TUX1007" "1701LLO0708"
Data_Providerlevels(CSLAP$Data_Provider)
## NULL
CSLAP$Data_Provider<-as.character(CSLAP$Data_Provider)
CSLAP[CSLAP$Data_Provider == "csl", 6] <- "CSL"
CSLAP$Data_Provider<-as.factor(CSLAP$Data_Provider)
CSLAP$Data_Provider<-droplevels(CSLAP$Data_Provider)
levels(CSLAP$Data_Provider)
## [1] "CSL"
Info_Typelevels(CSLAP$Info_Type)
## NULL
CSLAP$Info_Type<-as.character(CSLAP$Info_Type)
CSLAP[CSLAP$Info_Type == "ow", 7] <- "OW"
CSLAP[CSLAP$Info_Type == "sb", 7] <- "SB"
CSLAP$Info_Type<-as.factor(CSLAP$Info_Type)
CSLAP$Info_Type<-droplevels(CSLAP$Info_Type)
levels(CSLAP$Info_Type)
## [1] "BS" "OW" "RT" "SB"
ESF_Microcystin vector# ESF_Microcystin should be a numeric column,but it's listed as factor. We need to remove character values from this column
CSLAP$ESF_Microcystin[CSLAP$ESF_Microcystin == "ND"] <- NA
CSLAP$ESF_Microcystin[CSLAP$ESF_Microcystin == "no filter"] <- NA
CSLAP$ESF_Microcystin <- as.numeric(as.character(CSLAP$ESF_Microcystin))
str(CSLAP$ESF_Microcystin)
## num [1:4898] NA NA NA NA NA NA NA NA NA NA ...
CSLAP$TN_TP<-CSLAP$TN/CSLAP$TP
result <- lapply(CSLAP, identifyMissing)
result
## $LakeID
## No problems found.
## $Lake_Name
## No problems found.
## $Proj_Code
## No problems found.
## $Sample_Name
## No problems found.
## $Sample_Date
## No problems found.
## $Data_Provider
## No problems found.
## $Info_Type
## No problems found.
## $LocationID
## No problems found.
## $Latitude
## No problems found.
## $Longitude
## No problems found.
## $Sample_Depth
## No problems found.
## $Secchi_Depth
## No problems found.
## $DO
## No problems found.
## $Bottom_DO
## No problems found.
## $pH
## No problems found.
## $TP
## No problems found.
## $TDP
## No problems found.
## $Conductance
## No problems found.
## $Temp_Air
## No problems found.
## $Temp_Water
## No problems found.
## $TN
## No problems found.
## $TDN
## No problems found.
## $Ammonia
## No problems found.
## $NOx
## No problems found.
## $Nitrate
## No problems found.
## $Nitrite
## No problems found.
## $True_Color
## No problems found.
## $Chloride
## No problems found.
## $Calcium
## No problems found.
## $Chl.a_FP
## No problems found.
## $Extracted_Chl.a
## No problems found.
## $UFI_SBK_FP_Chl.a
## No problems found.
## $UFI_SBK_FP_BGA
## No problems found.
## $UFI_SBK_Microcystin
## No problems found.
## $ESF_FP_Chl.a
## No problems found.
## $ESF_FP_BGA
## No problems found.
## $ESF_Microcystin
## No problems found.
## $HAB_Status
## No problems found.
## $Alkalinity_CaCO3
## No problems found.
## $DOC
## No problems found.
## $UV_254
## No problems found.
## $Iron
## No problems found.
## $Manganese
## No problems found.
## $Arsenic
## No problems found.
## $QA
## No problems found.
## $QB
## No problems found.
## $QC
## No problems found.
## $Sample_Year
## No problems found.
## $Sample_Month
## No problems found.
## $TN_TP
## The following suspected missing value codes enter as regular values: Inf, NaN.
CSLAP$TN_TP[CSLAP$TN_TP == "Inf"] <- NA
CSLAP$TN_TP[CSLAP$TN_TP == "NaN"] <- NA
result <- lapply(CSLAP, identifyMissing)
result
## $LakeID
## No problems found.
## $Lake_Name
## No problems found.
## $Proj_Code
## No problems found.
## $Sample_Name
## No problems found.
## $Sample_Date
## No problems found.
## $Data_Provider
## No problems found.
## $Info_Type
## No problems found.
## $LocationID
## No problems found.
## $Latitude
## No problems found.
## $Longitude
## No problems found.
## $Sample_Depth
## No problems found.
## $Secchi_Depth
## No problems found.
## $DO
## No problems found.
## $Bottom_DO
## No problems found.
## $pH
## No problems found.
## $TP
## No problems found.
## $TDP
## No problems found.
## $Conductance
## No problems found.
## $Temp_Air
## No problems found.
## $Temp_Water
## No problems found.
## $TN
## No problems found.
## $TDN
## No problems found.
## $Ammonia
## No problems found.
## $NOx
## No problems found.
## $Nitrate
## No problems found.
## $Nitrite
## No problems found.
## $True_Color
## No problems found.
## $Chloride
## No problems found.
## $Calcium
## No problems found.
## $Chl.a_FP
## No problems found.
## $Extracted_Chl.a
## No problems found.
## $UFI_SBK_FP_Chl.a
## No problems found.
## $UFI_SBK_FP_BGA
## No problems found.
## $UFI_SBK_Microcystin
## No problems found.
## $ESF_FP_Chl.a
## No problems found.
## $ESF_FP_BGA
## No problems found.
## $ESF_Microcystin
## No problems found.
## $HAB_Status
## No problems found.
## $Alkalinity_CaCO3
## No problems found.
## $DOC
## No problems found.
## $UV_254
## No problems found.
## $Iron
## No problems found.
## $Manganese
## No problems found.
## $Arsenic
## No problems found.
## $QA
## No problems found.
## $QB
## No problems found.
## $QC
## No problems found.
## $Sample_Year
## No problems found.
## $Sample_Month
## No problems found.
## $TN_TP
## No problems found.
Dreissenids<-read.csv("./raw_data/Dreissenid_Status_byYear.csv", header=TRUE)
Dreissenids$Lake_Name<-as.character(Dreissenids$Lake_Name)
Dreissenids[200:204,2] <- rep("Hadlock Pond",5)
Dreissenids$Lake_Name<-as.factor(Dreissenids$Lake_Name)
CASA_MD<-read.csv("./raw_data/CASA_Mean_Depth.csv", header=TRUE)
CASA_MD$Lake_Name<-as.character(CASA_MD$Lake_Name)
CASA_MD[27,2] <- "Hadlock Pond"
CASA_MD$Lake_Name<-as.factor(CASA_MD$Lake_Name)
Ag<-read.csv("./raw_data/Percent Ag Cover.csv", header=TRUE)
Ag$Lake_Name<-as.character(Ag$Lake_Name)
Ag[27,1] <- "Hadlock Pond"
Ag$Lake_Name<-as.factor(Ag$Lake_Name)
CSLAP<-merge(CSLAP, Dreissenids, by=c("LakeID", "Lake_Name", "Sample_Year"), all.x =TRUE)
CSLAP<-merge(CSLAP, CASA_MD, by=c("LakeID", "Lake_Name"), all.x=TRUE)
CSLAP<-merge(CSLAP, Ag, by="Lake_Name", all.x=TRUE)
Now we have a clean, full dataset. Next, we need to create the reduced dataset using propensity score matching
Lake_List<-read.csv("./raw_data/LakeList.csv", header=TRUE)
Lake_List<-merge(Lake_List, Ag, by="Lake_Name", na.rm=FALSE, all.x=TRUE) #merge %ag
Lake_List<-Lake_List[Lake_List$LakeID != "0403RUS0146",] #Remove Lake Rushford
m_ps <- glm(Dreissenids ~ Mean_Depth_m + CA.SA + Percent_Ag,
family = binomial(), data = Lake_List)
summary(m_ps)
##
## Call:
## glm(formula = Dreissenids ~ Mean_Depth_m + CA.SA + Percent_Ag,
## family = binomial(), data = Lake_List)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2155 -0.4089 -0.3318 -0.3144 2.4219
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.113660 0.954012 -3.264 0.00110 **
## Mean_Depth_m 0.019552 0.131026 0.149 0.88138
## CA.SA 0.005799 0.010392 0.558 0.57684
## Percent_Ag 0.068900 0.026159 2.634 0.00844 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 49.008 on 66 degrees of freedom
## Residual deviance: 40.860 on 63 degrees of freedom
## (1 observation deleted due to missingness)
## AIC: 48.86
##
## Number of Fisher Scoring iterations: 5
prs_df <- data.frame(pr_score = predict(m_ps, type = "response"),
Dreissenids = m_ps$model$Dreissenids)
head(prs_df)
## pr_score Dreissenids
## 1 0.04688800 0
## 2 0.04809539 0
## 3 0.05508699 0
## 4 0.16993506 0
## 5 0.05479752 0
## 6 0.07388909 0
labs <- paste("Invasion Status:", c("Invaded", "Uninvaded"))
prs_df %>%
mutate(Dreissenids = ifelse(Dreissenids == 1, labs[1], labs[2])) %>%
ggplot(aes(x = pr_score)) +
geom_histogram(color = "white") +
facet_wrap(~Dreissenids) +
xlab("Probability of being invaded") +
theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
lakes_nomiss <- Lake_List %>% # MatchIt does not allow missing values
dplyr::select(CA.SA, Mean_Depth_m, Percent_Ag, LakeID, Lake_Name, Dreissenids) %>%
na.omit()
mod_match <- matchit(Dreissenids ~ CA.SA + Mean_Depth_m + Percent_Ag,
method = "nearest", data = lakes_nomiss)
df_match <- match.data(mod_match)
redLakes<-df_match[,c(4,6)]
redCSLAP<-merge(CSLAP, redLakes, by="LakeID")
redCSLAP<-redCSLAP[,1:54] #removed redundant Dreissenids column
colnames(redCSLAP)[51] <- "Dreissenids"
#melt data frame by Dreissenids and Info_type so we can summarize all our variables of interest in one code chunk
CSLAP2<-CSLAP[, c(8, 13, 17, 21, 22, 28, 32, 36, 37, 38, 50:54)]
meltCSLAP <- melt(CSLAP2, id=c("Dreissenids", "Info_Type"), na.rm=TRUE)
#summarize each variable split by invasion status and info
VariableSummaries<-ddply(meltCSLAP, c("Dreissenids", "Info_Type", "variable"), summarize,
Mean = mean(value, na.rm=TRUE),
Median = median(value, na.rm=TRUE),
Min = min(value, na.rm=TRUE),
Max = max(value, na.rm=TRUE),
SD = sd(value, na.rm=TRUE))
VariableSummaries
## Dreissenids Info_Type variable Mean Median Min
## 1 Invaded BS TP 3.945294e-02 0.019850 0.0000000
## 2 Invaded BS Temp_Water 1.511196e+01 15.000000 5.0000000
## 3 Invaded BS CA.SA 2.788857e+01 6.507353 4.4873502
## 4 Invaded BS Mean_Depth_m 6.232740e+00 6.200000 3.7000000
## 5 Invaded BS Percent_Ag 2.046619e+01 20.000000 1.0000000
## 6 Invaded OW Secchi_Depth 4.536559e+00 4.400000 1.1000000
## 7 Invaded OW TP 1.276286e-02 0.011000 0.0036000
## 8 Invaded OW Temp_Water 2.330388e+01 24.000000 10.0000000
## 9 Invaded OW TN 5.232666e-01 0.368810 0.0000000
## 10 Invaded OW True_Color 1.295292e+01 11.000000 0.5000000
## 11 Invaded OW Extracted_Chl.a 2.372222e+00 1.900000 0.0500000
## 12 Invaded OW ESF_FP_Chl.a 1.945223e+00 1.600000 0.0000000
## 13 Invaded OW ESF_FP_BGA 4.813312e-01 0.100000 0.0000000
## 14 Invaded OW ESF_Microcystin 6.437536e-01 0.440000 0.3285191
## 15 Invaded OW TN_TP 4.676542e+01 31.182701 0.0000000
## 16 Invaded OW CA.SA 2.823056e+01 10.780370 4.4873502
## 17 Invaded OW Mean_Depth_m 6.233231e+00 6.200000 3.7000000
## 18 Invaded OW Percent_Ag 1.934462e+01 20.000000 1.0000000
## 19 Invaded SB Secchi_Depth 2.750000e+00 2.750000 2.7500000
## 20 Invaded SB ESF_FP_Chl.a 2.138201e+03 142.635000 0.2000000
## 21 Invaded SB ESF_FP_BGA 2.002831e+03 85.475000 0.0000000
## 22 Invaded SB ESF_Microcystin 7.493377e+01 43.840108 0.8346926
## 23 Invaded SB CA.SA 1.424389e+01 4.807692 4.6535893
## 24 Invaded SB Mean_Depth_m 5.713333e+00 7.100000 3.7000000
## 25 Invaded SB Percent_Ag 3.256667e+01 21.500000 20.0000000
## 26 Uninvaded BS Secchi_Depth 5.383333e+00 5.350000 5.1000000
## 27 Uninvaded BS TP 5.579272e-02 0.015000 0.0000000
## 28 Uninvaded BS Temp_Water 1.347582e+01 13.000000 3.0000000
## 29 Uninvaded BS TN 3.655000e-01 0.365500 0.3580000
## 30 Uninvaded BS True_Color 1.300000e+01 13.000000 9.0000000
## 31 Uninvaded BS Extracted_Chl.a 1.600000e+00 1.600000 1.6000000
## 32 Uninvaded BS ESF_FP_Chl.a 5.300000e-01 0.570000 0.3900000
## 33 Uninvaded BS ESF_FP_BGA 0.000000e+00 0.000000 0.0000000
## 34 Uninvaded BS TN_TP 1.234775e+01 12.347749 8.1978022
## 35 Uninvaded BS CA.SA 1.816096e+01 7.732739 0.7403433
## 36 Uninvaded BS Mean_Depth_m 7.083978e+00 5.500000 2.6000000
## 37 Uninvaded BS Percent_Ag 8.507919e+00 1.000000 0.0000000
## 38 Uninvaded OW Secchi_Depth 4.075063e+00 3.800000 1.0000000
## 39 Uninvaded OW TP 1.207152e-02 0.009700 0.0000000
## 40 Uninvaded OW Temp_Water 2.271985e+01 23.000000 7.0000000
## 41 Uninvaded OW TN 3.831279e-01 0.325000 0.0000000
## 42 Uninvaded OW True_Color 1.578847e+01 13.000000 0.5000000
## 43 Uninvaded OW Extracted_Chl.a 3.377997e+00 2.400000 0.0500000
## 44 Uninvaded OW ESF_FP_Chl.a 1.007138e+01 1.700000 0.0000000
## 45 Uninvaded OW ESF_FP_BGA 2.181487e+00 0.090000 0.0000000
## 46 Uninvaded OW ESF_Microcystin 1.188658e+00 0.407566 0.1600000
## 47 Uninvaded OW TN_TP 4.209054e+01 32.444444 1.0840999
## 48 Uninvaded OW CA.SA 1.883358e+01 8.945022 0.7403433
## 49 Uninvaded OW Mean_Depth_m 5.966943e+00 4.700000 0.4000000
## 50 Uninvaded OW Percent_Ag 6.657864e+00 1.000000 0.0000000
## 51 Uninvaded RT CA.SA 8.140788e+01 81.407877 81.4078774
## 52 Uninvaded RT Mean_Depth_m 1.700000e+01 17.000000 17.0000000
## 53 Uninvaded RT Percent_Ag 1.000000e+00 1.000000 1.0000000
## 54 Uninvaded SB Secchi_Depth 3.600000e+00 2.500000 1.7000000
## 55 Uninvaded SB ESF_FP_Chl.a 2.791198e+03 171.710000 0.2800000
## 56 Uninvaded SB ESF_FP_BGA 2.309770e+03 26.870000 0.0000000
## 57 Uninvaded SB ESF_Microcystin 2.086218e+03 100.355000 0.8100000
## 58 Uninvaded SB CA.SA 9.269072e+00 6.057692 0.7403433
## 59 Uninvaded SB Mean_Depth_m 5.083735e+00 4.600000 0.4000000
## 60 Uninvaded SB Percent_Ag 9.583851e+00 4.000000 0.0000000
## Max SD
## 1 0.37420 5.324365e-02
## 2 25.00000 5.298994e+00
## 3 189.85507 5.436477e+01
## 4 9.90000 1.727590e+00
## 5 48.00000 1.550414e+01
## 6 9.10000 1.395980e+00
## 7 0.06640 7.093941e-03
## 8 36.00000 2.860583e+00
## 9 4.12400 4.399775e-01
## 10 55.00000 9.245752e+00
## 11 13.30000 1.839573e+00
## 12 12.15000 1.634573e+00
## 13 10.36000 9.835983e-01
## 14 1.84000 5.345261e-01
## 15 240.59524 4.110511e+01
## 16 189.85507 5.391221e+01
## 17 9.90000 1.697457e+00
## 18 48.00000 1.529962e+01
## 19 2.75000 NA
## 20 31507.50000 6.226951e+03
## 21 31507.50000 6.221888e+03
## 22 361.24104 9.244751e+01
## 23 189.85507 4.064373e+01
## 24 9.90000 1.901513e+00
## 25 48.00000 1.359881e+01
## 26 5.70000 3.013857e-01
## 27 3.90960 1.657730e-01
## 28 29.00000 5.807561e+00
## 29 0.37300 1.060660e-02
## 30 17.00000 4.000000e+00
## 31 1.60000 NA
## 32 0.63000 1.249000e-01
## 33 0.00000 0.000000e+00
## 34 16.49770 5.868911e+00
## 35 210.42471 3.484989e+01
## 36 21.30000 4.142202e+00
## 37 44.00000 1.271529e+01
## 38 13.60000 1.697343e+00
## 39 0.21590 1.094758e-02
## 40 34.00000 3.428063e+00
## 41 12.93200 4.134485e-01
## 42 100.00000 1.073302e+01
## 43 63.20000 3.682886e+00
## 44 3438.50000 1.476402e+02
## 45 2564.00000 5.420867e+01
## 46 52.86975 5.577302e+00
## 47 7936.00000 1.705462e+02
## 48 210.42471 3.385397e+01
## 49 21.30000 4.112813e+00
## 50 44.00000 1.106120e+01
## 51 81.40788 NA
## 52 17.00000 NA
## 53 1.00000 NA
## 54 6.60000 2.628688e+00
## 55 74781.25000 9.683792e+03
## 56 74781.25000 9.173332e+03
## 57 39757.12502 7.757750e+03
## 58 54.94505 7.784528e+00
## 59 13.00000 2.380611e+00
## 60 41.00000 1.102359e+01
#I will pull out only the values that I need to make copying to a word document "easy"
VariableSummaries[c(1, 6:11,15:18, 20:22, 27, 38:34, 47:50, 55:57),]
## Dreissenids Info_Type variable Mean Median Min
## 1 Invaded BS TP 3.945294e-02 0.019850 0.0000000
## 6 Invaded OW Secchi_Depth 4.536559e+00 4.400000 1.1000000
## 7 Invaded OW TP 1.276286e-02 0.011000 0.0036000
## 8 Invaded OW Temp_Water 2.330388e+01 24.000000 10.0000000
## 9 Invaded OW TN 5.232666e-01 0.368810 0.0000000
## 10 Invaded OW True_Color 1.295292e+01 11.000000 0.5000000
## 11 Invaded OW Extracted_Chl.a 2.372222e+00 1.900000 0.0500000
## 15 Invaded OW TN_TP 4.676542e+01 31.182701 0.0000000
## 16 Invaded OW CA.SA 2.823056e+01 10.780370 4.4873502
## 17 Invaded OW Mean_Depth_m 6.233231e+00 6.200000 3.7000000
## 18 Invaded OW Percent_Ag 1.934462e+01 20.000000 1.0000000
## 20 Invaded SB ESF_FP_Chl.a 2.138201e+03 142.635000 0.2000000
## 21 Invaded SB ESF_FP_BGA 2.002831e+03 85.475000 0.0000000
## 22 Invaded SB ESF_Microcystin 7.493377e+01 43.840108 0.8346926
## 27 Uninvaded BS TP 5.579272e-02 0.015000 0.0000000
## 38 Uninvaded OW Secchi_Depth 4.075063e+00 3.800000 1.0000000
## 37 Uninvaded BS Percent_Ag 8.507919e+00 1.000000 0.0000000
## 36 Uninvaded BS Mean_Depth_m 7.083978e+00 5.500000 2.6000000
## 35 Uninvaded BS CA.SA 1.816096e+01 7.732739 0.7403433
## 34 Uninvaded BS TN_TP 1.234775e+01 12.347749 8.1978022
## 47 Uninvaded OW TN_TP 4.209054e+01 32.444444 1.0840999
## 48 Uninvaded OW CA.SA 1.883358e+01 8.945022 0.7403433
## 49 Uninvaded OW Mean_Depth_m 5.966943e+00 4.700000 0.4000000
## 50 Uninvaded OW Percent_Ag 6.657864e+00 1.000000 0.0000000
## 55 Uninvaded SB ESF_FP_Chl.a 2.791198e+03 171.710000 0.2800000
## 56 Uninvaded SB ESF_FP_BGA 2.309770e+03 26.870000 0.0000000
## 57 Uninvaded SB ESF_Microcystin 2.086218e+03 100.355000 0.8100000
## Max SD
## 1 0.3742 5.324365e-02
## 6 9.1000 1.395980e+00
## 7 0.0664 7.093941e-03
## 8 36.0000 2.860583e+00
## 9 4.1240 4.399775e-01
## 10 55.0000 9.245752e+00
## 11 13.3000 1.839573e+00
## 15 240.5952 4.110511e+01
## 16 189.8551 5.391221e+01
## 17 9.9000 1.697457e+00
## 18 48.0000 1.529962e+01
## 20 31507.5000 6.226951e+03
## 21 31507.5000 6.221888e+03
## 22 361.2410 9.244751e+01
## 27 3.9096 1.657730e-01
## 38 13.6000 1.697343e+00
## 37 44.0000 1.271529e+01
## 36 21.3000 4.142202e+00
## 35 210.4247 3.484989e+01
## 34 16.4977 5.868911e+00
## 47 7936.0000 1.705462e+02
## 48 210.4247 3.385397e+01
## 49 21.3000 4.112813e+00
## 50 44.0000 1.106120e+01
## 55 74781.2500 9.683792e+03
## 56 74781.2500 9.173332e+03
## 57 39757.1250 7.757750e+03
#Write this dataframe to a csv so the values can be easily put into a word document
write.csv(VariableSummaries, "./output_data/VariableSummaries-June2020.csv")
First we will split our dataset based on information type
OWCSLAP<-CSLAP[CSLAP$Info_Type == "OW",]
BSCSLAP<-CSLAP[CSLAP$Info_Type == "BS",]
SBCSLAP<-CSLAP[CSLAP$Info_Type == "SB",]
redOWCSLAP<-redCSLAP[redCSLAP$Info_Type == "OW",]
redBSCSLAP<-redCSLAP[redCSLAP$Info_Type == "BS",]
redBSCSLAP<-redCSLAP[redCSLAP$Info_Type == "SB",]
ggplot(OWCSLAP, aes(x=TP)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Open water TP", x="TP (mg/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 49 rows containing non-finite values (stat_bin).
## Warning: Removed 49 rows containing non-finite values (stat_density).
ggplot(OWCSLAP, aes(x=TN)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Open water TN", x="TN (mg/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 66 rows containing non-finite values (stat_bin).
## Warning: Removed 66 rows containing non-finite values (stat_density).
ggplot(OWCSLAP, aes(x=TN_TP)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Open water TN:TP", x="TN:TP", y="Frequency")+
scale_x_continuous(limits=c(0,100))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 180 rows containing non-finite values (stat_bin).
## Warning: Removed 180 rows containing non-finite values (stat_density).
## Warning: Removed 4 rows containing missing values (geom_bar).
ggplot(OWCSLAP, aes(x=Extracted_Chl.a)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Open water chlorophyll a", x="Chlorophyll a (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 114 rows containing non-finite values (stat_bin).
## Warning: Removed 114 rows containing non-finite values (stat_density).
ggplot(OWCSLAP, aes(x=Secchi_Depth)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Secchi Depth", x="Secchi depth (m)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 114 rows containing non-finite values (stat_bin).
## Warning: Removed 114 rows containing non-finite values (stat_density).
ggplot(OWCSLAP, aes(x=True_Color)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="True Color", x="True color (PTU)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 95 rows containing non-finite values (stat_bin).
## Warning: Removed 95 rows containing non-finite values (stat_density).
ggplot(SBCSLAP, aes(x=ESF_FP_Chl.a)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Shoreline Bloom Chlorophyll a", x="Chlorophyll a (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing non-finite values (stat_density).
ggplot(SBCSLAP, aes(x=ESF_Microcystin)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Shoreline Bloom Microcystin", x="Microcystin (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 180 rows containing non-finite values (stat_bin).
## Warning: Removed 180 rows containing non-finite values (stat_density).
#Quickly change factor level labels for Dreissenids
Lake_List[Lake_List$Dreissenids == 0, 5] <- rep("Uninvaded", 60)
Lake_List[Lake_List$Dreissenids == 1, 5] <- rep("Invaded", 8)
ggplot(Lake_List, aes(x=Mean_Depth_m)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Mean Depth", x="Mean depth (m)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(Lake_List, aes(x=CA.SA)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Catchment Area to Surface Area Ratio", x="CA:SA", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(Lake_List, aes(x=Percent_Ag)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Percent Agricultural Cover in Watershed", x="Agriculture (%)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).
## Warning: Removed 1 rows containing non-finite values (stat_density).
#melt data frame by Dreissenids and Info_type so we can summarize all our variables of interest in one code chunk
redCSLAP2<-redCSLAP[, c(8, 13, 17, 21, 22, 28, 32, 36, 37, 38, 50:54)]
redmeltCSLAP <- melt(redCSLAP2, id=c("Dreissenids", "Info_Type"), na.rm=TRUE)
#summarize each variable split by invasion status and info
redVariableSummaries<-ddply(meltCSLAP, c("Dreissenids", "Info_Type", "variable"), summarize,
Mean = mean(value, na.rm=TRUE),
Median = median(value, na.rm=TRUE),
Min = min(value, na.rm=TRUE),
Max = max(value, na.rm=TRUE),
SD = sd(value, na.rm=TRUE))
redVariableSummaries
## Dreissenids Info_Type variable Mean Median Min
## 1 Invaded BS TP 3.945294e-02 0.019850 0.0000000
## 2 Invaded BS Temp_Water 1.511196e+01 15.000000 5.0000000
## 3 Invaded BS CA.SA 2.788857e+01 6.507353 4.4873502
## 4 Invaded BS Mean_Depth_m 6.232740e+00 6.200000 3.7000000
## 5 Invaded BS Percent_Ag 2.046619e+01 20.000000 1.0000000
## 6 Invaded OW Secchi_Depth 4.536559e+00 4.400000 1.1000000
## 7 Invaded OW TP 1.276286e-02 0.011000 0.0036000
## 8 Invaded OW Temp_Water 2.330388e+01 24.000000 10.0000000
## 9 Invaded OW TN 5.232666e-01 0.368810 0.0000000
## 10 Invaded OW True_Color 1.295292e+01 11.000000 0.5000000
## 11 Invaded OW Extracted_Chl.a 2.372222e+00 1.900000 0.0500000
## 12 Invaded OW ESF_FP_Chl.a 1.945223e+00 1.600000 0.0000000
## 13 Invaded OW ESF_FP_BGA 4.813312e-01 0.100000 0.0000000
## 14 Invaded OW ESF_Microcystin 6.437536e-01 0.440000 0.3285191
## 15 Invaded OW TN_TP 4.676542e+01 31.182701 0.0000000
## 16 Invaded OW CA.SA 2.823056e+01 10.780370 4.4873502
## 17 Invaded OW Mean_Depth_m 6.233231e+00 6.200000 3.7000000
## 18 Invaded OW Percent_Ag 1.934462e+01 20.000000 1.0000000
## 19 Invaded SB Secchi_Depth 2.750000e+00 2.750000 2.7500000
## 20 Invaded SB ESF_FP_Chl.a 2.138201e+03 142.635000 0.2000000
## 21 Invaded SB ESF_FP_BGA 2.002831e+03 85.475000 0.0000000
## 22 Invaded SB ESF_Microcystin 7.493377e+01 43.840108 0.8346926
## 23 Invaded SB CA.SA 1.424389e+01 4.807692 4.6535893
## 24 Invaded SB Mean_Depth_m 5.713333e+00 7.100000 3.7000000
## 25 Invaded SB Percent_Ag 3.256667e+01 21.500000 20.0000000
## 26 Uninvaded BS Secchi_Depth 5.383333e+00 5.350000 5.1000000
## 27 Uninvaded BS TP 5.579272e-02 0.015000 0.0000000
## 28 Uninvaded BS Temp_Water 1.347582e+01 13.000000 3.0000000
## 29 Uninvaded BS TN 3.655000e-01 0.365500 0.3580000
## 30 Uninvaded BS True_Color 1.300000e+01 13.000000 9.0000000
## 31 Uninvaded BS Extracted_Chl.a 1.600000e+00 1.600000 1.6000000
## 32 Uninvaded BS ESF_FP_Chl.a 5.300000e-01 0.570000 0.3900000
## 33 Uninvaded BS ESF_FP_BGA 0.000000e+00 0.000000 0.0000000
## 34 Uninvaded BS TN_TP 1.234775e+01 12.347749 8.1978022
## 35 Uninvaded BS CA.SA 1.816096e+01 7.732739 0.7403433
## 36 Uninvaded BS Mean_Depth_m 7.083978e+00 5.500000 2.6000000
## 37 Uninvaded BS Percent_Ag 8.507919e+00 1.000000 0.0000000
## 38 Uninvaded OW Secchi_Depth 4.075063e+00 3.800000 1.0000000
## 39 Uninvaded OW TP 1.207152e-02 0.009700 0.0000000
## 40 Uninvaded OW Temp_Water 2.271985e+01 23.000000 7.0000000
## 41 Uninvaded OW TN 3.831279e-01 0.325000 0.0000000
## 42 Uninvaded OW True_Color 1.578847e+01 13.000000 0.5000000
## 43 Uninvaded OW Extracted_Chl.a 3.377997e+00 2.400000 0.0500000
## 44 Uninvaded OW ESF_FP_Chl.a 1.007138e+01 1.700000 0.0000000
## 45 Uninvaded OW ESF_FP_BGA 2.181487e+00 0.090000 0.0000000
## 46 Uninvaded OW ESF_Microcystin 1.188658e+00 0.407566 0.1600000
## 47 Uninvaded OW TN_TP 4.209054e+01 32.444444 1.0840999
## 48 Uninvaded OW CA.SA 1.883358e+01 8.945022 0.7403433
## 49 Uninvaded OW Mean_Depth_m 5.966943e+00 4.700000 0.4000000
## 50 Uninvaded OW Percent_Ag 6.657864e+00 1.000000 0.0000000
## 51 Uninvaded RT CA.SA 8.140788e+01 81.407877 81.4078774
## 52 Uninvaded RT Mean_Depth_m 1.700000e+01 17.000000 17.0000000
## 53 Uninvaded RT Percent_Ag 1.000000e+00 1.000000 1.0000000
## 54 Uninvaded SB Secchi_Depth 3.600000e+00 2.500000 1.7000000
## 55 Uninvaded SB ESF_FP_Chl.a 2.791198e+03 171.710000 0.2800000
## 56 Uninvaded SB ESF_FP_BGA 2.309770e+03 26.870000 0.0000000
## 57 Uninvaded SB ESF_Microcystin 2.086218e+03 100.355000 0.8100000
## 58 Uninvaded SB CA.SA 9.269072e+00 6.057692 0.7403433
## 59 Uninvaded SB Mean_Depth_m 5.083735e+00 4.600000 0.4000000
## 60 Uninvaded SB Percent_Ag 9.583851e+00 4.000000 0.0000000
## Max SD
## 1 0.37420 5.324365e-02
## 2 25.00000 5.298994e+00
## 3 189.85507 5.436477e+01
## 4 9.90000 1.727590e+00
## 5 48.00000 1.550414e+01
## 6 9.10000 1.395980e+00
## 7 0.06640 7.093941e-03
## 8 36.00000 2.860583e+00
## 9 4.12400 4.399775e-01
## 10 55.00000 9.245752e+00
## 11 13.30000 1.839573e+00
## 12 12.15000 1.634573e+00
## 13 10.36000 9.835983e-01
## 14 1.84000 5.345261e-01
## 15 240.59524 4.110511e+01
## 16 189.85507 5.391221e+01
## 17 9.90000 1.697457e+00
## 18 48.00000 1.529962e+01
## 19 2.75000 NA
## 20 31507.50000 6.226951e+03
## 21 31507.50000 6.221888e+03
## 22 361.24104 9.244751e+01
## 23 189.85507 4.064373e+01
## 24 9.90000 1.901513e+00
## 25 48.00000 1.359881e+01
## 26 5.70000 3.013857e-01
## 27 3.90960 1.657730e-01
## 28 29.00000 5.807561e+00
## 29 0.37300 1.060660e-02
## 30 17.00000 4.000000e+00
## 31 1.60000 NA
## 32 0.63000 1.249000e-01
## 33 0.00000 0.000000e+00
## 34 16.49770 5.868911e+00
## 35 210.42471 3.484989e+01
## 36 21.30000 4.142202e+00
## 37 44.00000 1.271529e+01
## 38 13.60000 1.697343e+00
## 39 0.21590 1.094758e-02
## 40 34.00000 3.428063e+00
## 41 12.93200 4.134485e-01
## 42 100.00000 1.073302e+01
## 43 63.20000 3.682886e+00
## 44 3438.50000 1.476402e+02
## 45 2564.00000 5.420867e+01
## 46 52.86975 5.577302e+00
## 47 7936.00000 1.705462e+02
## 48 210.42471 3.385397e+01
## 49 21.30000 4.112813e+00
## 50 44.00000 1.106120e+01
## 51 81.40788 NA
## 52 17.00000 NA
## 53 1.00000 NA
## 54 6.60000 2.628688e+00
## 55 74781.25000 9.683792e+03
## 56 74781.25000 9.173332e+03
## 57 39757.12502 7.757750e+03
## 58 54.94505 7.784528e+00
## 59 13.00000 2.380611e+00
## 60 41.00000 1.102359e+01
#Write this dataframe to a csv so the values can be easily put into a word document
write.csv(redVariableSummaries, "./output_data/redVariableSummaries-June2020.csv")
First we will split our dataset based on information type
redOWCSLAP<-redCSLAP[redCSLAP$Info_Type == "OW",]
redBSCSLAP<-redCSLAP[redCSLAP$Info_Type == "BS",]
redSBCSLAP<-redCSLAP[redCSLAP$Info_Type == "SB",]
ggplot(redOWCSLAP, aes(x=TP)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Open water TP", x="TP (mg/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 13 rows containing non-finite values (stat_bin).
## Warning: Removed 13 rows containing non-finite values (stat_density).
ggplot(redOWCSLAP, aes(x=TN)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Open water TN", x="TN (mg/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 20 rows containing non-finite values (stat_bin).
## Warning: Removed 20 rows containing non-finite values (stat_density).
ggplot(redOWCSLAP, aes(x=TN_TP)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Open water TN:TP", x="TN:TP", y="Frequency")+
scale_x_continuous(limits=c(0,100))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 67 rows containing non-finite values (stat_bin).
## Warning: Removed 67 rows containing non-finite values (stat_density).
## Warning: Removed 4 rows containing missing values (geom_bar).
ggplot(redOWCSLAP, aes(x=Extracted_Chl.a)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Open water chlorophyll a", x="Chlorophyll a (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 24 rows containing non-finite values (stat_bin).
## Warning: Removed 24 rows containing non-finite values (stat_density).
ggplot(redOWCSLAP, aes(x=Secchi_Depth)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Secchi Depth", x="Secchi depth (m)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 30 rows containing non-finite values (stat_bin).
## Warning: Removed 30 rows containing non-finite values (stat_density).
ggplot(redOWCSLAP, aes(x=True_Color)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="True Color", x="True color (PTU)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 21 rows containing non-finite values (stat_bin).
## Warning: Removed 21 rows containing non-finite values (stat_density).
ggplot(redSBCSLAP, aes(x=ESF_FP_Chl.a)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Shoreline Bloom Chlorophyll a", x="Chlorophyll a (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(redSBCSLAP, aes(x=ESF_Microcystin)) +
geom_histogram(position="identity", color="black", fill="white")+
facet_wrap(~Dreissenids)+
geom_density()+
theme_minimal()+
labs(title="Shoreline Bloom Microcystin", x="Microcystin (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 55 rows containing non-finite values (stat_bin).
## Warning: Removed 55 rows containing non-finite values (stat_density).
## Warning: Groups with fewer than two data points have been dropped.
Because of the way the data were given to me, Secchi depth has to be done first on its own
hisCSLAP<-read.csv("./historical_data/Historical_CSLAP.csv", na.strings=c("", " ", "ND", "NA", "no filter", "nofilter"))
#extract sample year and month
hisCSLAP$Sample_Year<-as.numeric(format(as.Date(hisCSLAP$Sample_Date, format="%m/%d/%Y"),"%Y"))
hisCSLAP$Sample_Month<-as.numeric(format(as.Date(hisCSLAP$Sample_Date, format="%m/%d/%Y"),"%m"))
#read in invasion year information
Invasion_Year<-read.csv("./raw_data/InvasionYear.csv", header=TRUE, colClasses=c("factor", "numeric"))
#merge with hisCSLAP
hisCSLAP<-merge(hisCSLAP, Invasion_Year, all.x=TRUE)
#create new column `Years_Invaded` by subtracting `Invasion Year` from `Sample Year`
hisCSLAP$Years_Invaded<-hisCSLAP$Sample_Year-hisCSLAP$Invasion_Year
#turn `Sample_Year`, `Sample_Month`, and `Invasion_Year` back to factors
hisCSLAP$Sample_Year<-as.factor(hisCSLAP$Sample_Year)
hisCSLAP$Sample_Month<-as.factor(hisCSLAP$Sample_Month)
hisCSLAP$Invasion_Year<-as.factor(hisCSLAP$Invasion_Year)
#create column with 'Invaded' or "Uninvaded" based on `Years Invaded`
hisCSLAP$Dreissenids<-ifelse(hisCSLAP$Years_Invaded >= 0, "Invaded", "Uninvaded")
hisCSLAP$Dreissenids<-as.factor(hisCSLAP$Dreissenids)
#Aggregating yearly means by lake and years invaded
Secchi<-aggregate(Secchi_Depth_m ~ Lake_Name + Years_Invaded, data=hisCSLAP, FUN=mean)
Chl<-aggregate(Extracted_Chl_ug.L ~ Lake_Name + Years_Invaded, data=hisCSLAP, FUN=mean)
#t-test pre and post invasion and MEMs
hist(hisCSLAP$Secchi_Depth_m)
t.test(Secchi_Depth_m~Dreissenids, data=hisCSLAP)
##
## Welch Two Sample t-test
##
## data: Secchi_Depth_m by Dreissenids
## t = 10.37, df = 1054.5, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.6735570 0.9879379
## sample estimates:
## mean in group Invaded mean in group Uninvaded
## 4.672985 3.842238
Secchi.lmer<-lmer(Secchi_Depth_m ~ Dreissenids + (1|LakeID), data=hisCSLAP)
summary(Secchi.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Secchi_Depth_m ~ Dreissenids + (1 | LakeID)
## Data: hisCSLAP
##
## REML criterion at convergence: 4388.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9324 -0.6854 -0.0512 0.6419 3.5186
##
## Random effects:
## Groups Name Variance Std.Dev.
## LakeID (Intercept) 0.4754 0.6895
## Residual 1.5707 1.2533
## Number of obs: 1324, groups: LakeID, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.62638 0.25053 7.48842 18.466 1.62e-07 ***
## DreissenidsUninvaded -0.70457 0.07522 1320.02303 -9.367 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.175
ggplot(hisCSLAP, aes(x=Dreissenids, y=Secchi_Depth_m))+
geom_boxplot()+
theme_minimal()+
labs(y = "Secchi Depth (m)")
## Warning: Removed 779 rows containing non-finite values (stat_boxplot).
#ggsave("historical_Secchi.png")
#historicalCSLAP df edited to match DeRuyter and EatonBrook dfs
hisCSLAP<-read.csv("./historical_data/Historical_CSLAP_edited.csv", na.strings=c("", " ", "ND", "NA", "no filter", "nofilter"))
#DeRuyter and EatonBrook dfs
Eaton<-read.csv("./historical_data/EatonHistoricalClean2.csv", na.strings=c("", " ", "ND", "NA", "no filter", "nofilter"))
Der<-read.csv("./historical_data/DeruyterHistoricalClean2.csv", na.strings=c("", " ", "ND", "NA", "no filter", "nofilter"))
#change levels of Info_Type
hisCSLAP$Info_Type[hisCSLAP$Info_Type == "ow"]<-"OW"
hisCSLAP$Info_Type[hisCSLAP$Info_Type == "sb"]<-"SB"
hisCSLAP$Info_Type<-as.factor(hisCSLAP$Info_Type)
hisCSLAP$Info_Type<-droplevels(hisCSLAP$Info_Type)
#Merge hisCSLAP with Deruyter and Eatonbrook
#remove any observations from Eaton or DeRuyter from hisCSLAP
hisCSLAP<-hisCSLAP[hisCSLAP$Lake_Name != "De Ruyter Reservoir",]
hisCSLAP<-hisCSLAP[hisCSLAP$Lake_Name != "Eaton Brook Reservoir",]
#row bind cleaned DeRuyter and EatonBrook
hisCSLAP<-rbind(hisCSLAP, Eaton)
hisCSLAP<-rbind(hisCSLAP, Der)
#extract sample year and month
hisCSLAP$Sample_Year<-as.numeric(format(as.Date(hisCSLAP$Sample_Date, format="%m/%d/%Y"),"%Y"))
hisCSLAP$Sample_Month<-as.numeric(format(as.Date(hisCSLAP$Sample_Date, format="%m/%d/%Y"),"%m"))
#merge Invasion Year with hisCSLAP
hisCSLAP<-merge(hisCSLAP, Invasion_Year, all.x=TRUE)
#create new column `Years_Invaded` by subtracting `Invasion Year` from `Sample Year`
hisCSLAP$Years_Invaded<-hisCSLAP$Sample_Year-hisCSLAP$Invasion_Year
#turn `Sample_Year`, `Sample_Month`, and `Invasion_Year` back to factors
hisCSLAP$Sample_Year<-as.factor(hisCSLAP$Sample_Year)
hisCSLAP$Sample_Month<-as.factor(hisCSLAP$Sample_Month)
hisCSLAP$Invasion_Year<-as.factor(hisCSLAP$Invasion_Year)
#create column with 'Invaded' or "Uninvaded" based on `Years Invaded`
hisCSLAP$Dreissenids<-ifelse(hisCSLAP$Years_Invaded >= 0, "Invaded", "Uninvaded")
hisCSLAP$Dreissenids<-as.factor(hisCSLAP$Dreissenids)
#we will also remove and shoreline bloom samples
hisCSLAP<-hisCSLAP[hisCSLAP$Info_Type != "SB",]
hisCSLAP$Info_Type<-droplevels(hisCSLAP$Info_Type)
#create a TN:TP column
hisCSLAP$TN_TP<-hisCSLAP$TN_mg.L/hisCSLAP$TP_mg.L
#finally, create a df with only OW samples and a df with only BS samples
OWhisCSLAP<-hisCSLAP[hisCSLAP$Info_Type == "OW",]
BShisCSLAP<-hisCSLAP[hisCSLAP$Info_Type == "BS",]
Chl<-aggregate(Extracted_Chl_ug.L ~ Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)
OWTP<-aggregate(TP_mg.L ~ Lake_Name + Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)
BSTP<-aggregate(TP_mg.L ~ Lake_Name + Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "BS",], FUN=mean)
OWTN<-aggregate(TN_mg.L ~ Lake_Name + Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)
OWTN_TP<-aggregate(TN_TP ~ Lake_Name + Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)
TC<-aggregate(True_Color_PTU ~ Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)
Calc<-aggregate(Calcium_mg.L ~ Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)
hist(log(OWhisCSLAP$Extracted_Chl_ug.L))
t.test(Extracted_Chl_ug.L ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
##
## Welch Two Sample t-test
##
## data: Extracted_Chl_ug.L by Dreissenids
## t = -12.443, df = 1320.3, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.624423 -1.909601
## sample estimates:
## mean in group Invaded mean in group Uninvaded
## 2.492524 4.759536
Chl.lmer<-lmer(log(Extracted_Chl_ug.L) ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
summary(Chl.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(Extracted_Chl_ug.L) ~ Dreissenids + (1 | LakeID)
## Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
##
## REML criterion at convergence: 3050.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.0999 -0.5200 -0.0071 0.5971 4.1661
##
## Random effects:
## Groups Name Variance Std.Dev.
## LakeID (Intercept) 0.04857 0.2204
## Residual 0.57543 0.7586
## Number of obs: 1323, groups: LakeID, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.034e-01 8.519e-02 7.636e+00 7.083 0.00013 ***
## DreissenidsUninvaded 6.015e-01 4.518e-02 1.318e+03 13.312 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.306
plot(Chl.lmer)
hist(log(OWhisCSLAP$TP_mg.L))
t.test(TP_mg.L ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
##
## Welch Two Sample t-test
##
## data: TP_mg.L by Dreissenids
## t = 3.1251, df = 760.3, p-value = 0.001845
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.0003861491 0.0016908695
## sample estimates:
## mean in group Invaded mean in group Uninvaded
## 0.01197581 0.01093730
OWTP.lmer<-lmer(log(TP_mg.L) ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
summary(OWTP.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TP_mg.L) ~ Dreissenids + (1 | LakeID)
## Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
##
## REML criterion at convergence: 868.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.0407 -0.6192 -0.0512 0.5392 6.2139
##
## Random effects:
## Groups Name Variance Std.Dev.
## LakeID (Intercept) 0.03243 0.1801
## Residual 0.11009 0.3318
## Number of obs: 1315, groups: LakeID, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -4.48194 0.06539 7.42433 -68.539 1.13e-11 ***
## DreissenidsUninvaded -0.11493 0.01972 1310.85322 -5.827 7.10e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.171
plot(OWTP.lmer)
hist(log(BShisCSLAP$TP_mg.L))
t.test(TP_mg.L ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "BS",])
##
## Welch Two Sample t-test
##
## data: TP_mg.L by Dreissenids
## t = -2.6076, df = 243.03, p-value = 0.009682
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.08319781 -0.01159341
## sample estimates:
## mean in group Invaded mean in group Uninvaded
## 0.04436469 0.09176031
BSTP.lmer<-lmer(log(TP_mg.L) ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "BS",])
summary(BSTP.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TP_mg.L) ~ Dreissenids + (1 | LakeID)
## Data: hisCSLAP[hisCSLAP$Info_Type == "BS", ]
##
## REML criterion at convergence: 1778.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4685 -0.5772 -0.1504 0.5627 4.1494
##
## Random effects:
## Groups Name Variance Std.Dev.
## LakeID (Intercept) 0.4817 0.6941
## Residual 0.8566 0.9255
## Number of obs: 651, groups: LakeID, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -3.78692 0.24993 7.12997 -15.152 1.1e-06 ***
## DreissenidsUninvaded 0.33838 0.08942 647.96512 3.784 0.000169 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.106
plot(BSTP.lmer)
hist(log(OWhisCSLAP$TN_mg.L))
t.test(TN_mg.L ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
##
## Welch Two Sample t-test
##
## data: TN_mg.L by Dreissenids
## t = -0.82352, df = 240.57, p-value = 0.411
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.12748917 0.05231878
## sample estimates:
## mean in group Invaded mean in group Uninvaded
## 0.4895265 0.5271117
OWTN.lmer<-lmer(log(TN_mg.L) ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
summary(OWTN.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TN_mg.L) ~ Dreissenids + (1 | LakeID)
## Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
##
## REML criterion at convergence: 880.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.3884 -0.5927 -0.0039 0.5499 5.6278
##
## Random effects:
## Groups Name Variance Std.Dev.
## LakeID (Intercept) 0.2847 0.5336
## Residual 0.2084 0.4565
## Number of obs: 661, groups: LakeID, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.855286 0.189889 7.030046 -4.504 0.00275 **
## DreissenidsUninvaded 0.004495 0.048195 655.931431 0.093 0.92572
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.060
hist(log(OWhisCSLAP$TN_TP))
t.test(TN_TP ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
##
## Welch Two Sample t-test
##
## data: TN_TP by Dreissenids
## t = -0.65903, df = 251.56, p-value = 0.5105
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -10.553469 5.261396
## sample estimates:
## mean in group Invaded mean in group Uninvaded
## 45.67168 48.31771
OWTN_TP.lmer<-lmer(log(TN_TP) ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
summary(OWTN_TP.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TN_TP) ~ Dreissenids + (1 | LakeID)
## Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
##
## REML criterion at convergence: 1073.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.2261 -0.5569 -0.0289 0.5560 4.2435
##
## Random effects:
## Groups Name Variance Std.Dev.
## LakeID (Intercept) 0.2491 0.4991
## Residual 0.2877 0.5364
## Number of obs: 651, groups: LakeID, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.62840 0.17831 7.05445 20.35 1.59e-07 ***
## DreissenidsUninvaded 0.06740 0.05665 647.39946 1.19 0.235
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.075
t.test(True_Color_PTU ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
##
## Welch Two Sample t-test
##
## data: True_Color_PTU by Dreissenids
## t = 11.754, df = 1008.7, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 5.747462 8.051237
## sample estimates:
## mean in group Invaded mean in group Uninvaded
## 14.263158 7.363808
TC.lmer<-lmer(True_Color_PTU ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
summary(TC.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: True_Color_PTU ~ Dreissenids + (1 | LakeID)
## Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
##
## REML criterion at convergence: 9688.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3027 -0.5143 -0.1901 0.2417 7.1496
##
## Random effects:
## Groups Name Variance Std.Dev.
## LakeID (Intercept) 44.38 6.662
## Residual 85.84 9.265
## Number of obs: 1325, groups: LakeID, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 15.1934 2.3925 7.0424 6.351 0.000376 ***
## DreissenidsUninvaded -5.8055 0.5509 1319.0393 -10.537 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.132
t.test(Calcium_mg.L ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
##
## Welch Two Sample t-test
##
## data: Calcium_mg.L by Dreissenids
## t = -0.034601, df = 64.648, p-value = 0.9725
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.422633 4.272010
## sample estimates:
## mean in group Invaded mean in group Uninvaded
## 30.53299 30.60830
Calc.lmer<-lmer(Calcium_mg.L ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
summary(Calc.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Calcium_mg.L ~ Dreissenids + (1 | LakeID)
## Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
##
## REML criterion at convergence: 858.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7693 -0.3958 -0.0114 0.3219 3.5680
##
## Random effects:
## Groups Name Variance Std.Dev.
## LakeID (Intercept) 88.02 9.382
## Residual 43.69 6.610
## Number of obs: 127, groups: LakeID, 8
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 31.031 3.414 7.283 9.090 3.13e-05 ***
## DreissenidsUninvaded 2.383 1.637 122.621 1.455 0.148
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.149
ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=Extracted_Chl_ug.L)) +
geom_boxplot()+
theme_bw()+
labs(y = "Extracted Chlorophyll-a (mg/L)")
## Warning: Removed 54 rows containing non-finite values (stat_boxplot).
#ggsave("./output_figures/historical_Chl.png")
ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=TP_mg.L)) +
geom_boxplot()+
theme_bw()+
labs(y = "Open Water TP (mg/L)")
## Warning: Removed 62 rows containing non-finite values (stat_boxplot).
#ggsave("./output_figures/historical_OWTP.png")
ggplot(hisCSLAP[hisCSLAP$Info_Type == "BS",], aes(x=Dreissenids, y=TP_mg.L)) +
geom_boxplot()+
theme_bw()+
labs(y = "Bottom Sample TP (mg/L)")
## Warning: Removed 228 rows containing non-finite values (stat_boxplot).
#ggsave("./output_figures/historical_BSTP.png")
ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=TN_mg.L)) +
geom_boxplot()+
theme_bw()+
labs(y = "Open Water TN (mg/L)")
## Warning: Removed 716 rows containing non-finite values (stat_boxplot).
#ggsave("./output_figures/historical_OWTN.png")
ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=TN_TP)) +
geom_boxplot()+
theme_bw()+
labs(y = "Open Water TN:TP")
## Warning: Removed 726 rows containing non-finite values (stat_boxplot).
#ggsave("./output_figures/historical_OWTN_TP.png")
ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=True_Color_PTU)) +
geom_boxplot()+
theme_bw()+
labs(y = "Open Water True Color")
## Warning: Removed 52 rows containing non-finite values (stat_boxplot).
#ggsave("./output_figures/historical_OWTC.png")
ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=Calcium_mg.L)) +
geom_boxplot()+
theme_bw()+
labs(y = "Open Water Calcium")
## Warning: Removed 1250 rows containing non-finite values (stat_boxplot).
#ggsave("./output_figures/historical_OWCalcium.png")
TNTP<-lmer(log(TN_TP+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag + (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=OWCSLAP)
summary(TNTP)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TN_TP + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +
## Percent_Ag + (1 | Sample_Year) + (1 | Sample_Month) + (1 |
## LakeID) + (1 | Sample_Year:LakeID)
## Data: OWCSLAP
##
## REML criterion at convergence: 4288.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -14.8325 -0.4551 0.0402 0.4806 9.2136
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sample_Year:LakeID (Intercept) 0.052851 0.22989
## LakeID (Intercept) 0.058346 0.24155
## Sample_Month (Intercept) 0.001713 0.04139
## Sample_Year (Intercept) 0.009578 0.09787
## Residual 0.255404 0.50538
## Number of obs: 2601, groups:
## Sample_Year:LakeID, 342; LakeID, 67; Sample_Month, 6; Sample_Year, 6
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.183775 0.165460 79.439722 19.242 <2e-16 ***
## DreissenidsUninvaded -0.032480 0.105180 92.981532 -0.309 0.7582
## log(CA.SA) 0.048601 0.033665 61.368921 1.444 0.1539
## log(Mean_Depth_m) 0.125403 0.052248 63.715137 2.400 0.0193 *
## Percent_Ag 0.001651 0.002880 66.035094 0.573 0.5683
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.673
## log(CA.SA) -0.458 0.044
## lg(Mn_Dpt_) -0.520 0.087 -0.040
## Percent_Ag -0.279 0.326 -0.044 -0.053
plot(TNTP)
qqPlot(resid(TNTP))
## 1395 4458
## 711 2353
BSTP<-lmer(log(TP+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag + (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=BSCSLAP)
summary(BSTP)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TP + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +
## Percent_Ag + (1 | Sample_Year) + (1 | Sample_Month) + (1 |
## LakeID) + (1 | Sample_Year:LakeID)
## Data: BSCSLAP
##
## REML criterion at convergence: 3060.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.4588 -0.4356 -0.0638 0.3495 4.9671
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sample_Year:LakeID (Intercept) 0.081919 0.28621
## LakeID (Intercept) 0.387229 0.62228
## Sample_Month (Intercept) 0.007195 0.08482
## Sample_Year (Intercept) 0.003081 0.05551
## Residual 0.243055 0.49301
## Number of obs: 1808, groups:
## Sample_Year:LakeID, 269; LakeID, 53; Sample_Month, 6; Sample_Year, 6
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -3.550746 0.448242 60.210119 -7.921 6.52e-11 ***
## DreissenidsUninvaded 0.246827 0.206337 134.468860 1.196 0.23371
## log(CA.SA) -0.013752 0.090666 48.401520 -0.152 0.88007
## log(Mean_Depth_m) -0.145212 0.203496 48.540796 -0.714 0.47890
## Percent_Ag 0.022462 0.006911 50.849848 3.250 0.00205 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.478
## log(CA.SA) -0.244 0.063
## lg(Mn_Dpt_) -0.740 0.022 -0.252
## Percent_Ag -0.346 0.256 -0.116 0.177
plot(BSTP)
qqPlot(resid(BSTP))
## 3945 946
## 1445 355
Full model error: Model failed to converge
-Remove Sample_Month. Sample_Year, Sample_Year:LakeID (low variance) to resolve error
TN<-lmer(log(TN+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag + (1|LakeID), data=OWCSLAP)
summary(TN)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TN + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +
## Percent_Ag + (1 | LakeID)
## Data: OWCSLAP
##
## REML criterion at convergence: 2964
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.3622 -0.5006 0.0011 0.4902 8.9046
##
## Random effects:
## Groups Name Variance Std.Dev.
## LakeID (Intercept) 0.06457 0.2541
## Residual 0.16644 0.4080
## Number of obs: 2638, groups: LakeID, 67
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.977263 0.140314 91.572863 -6.965 4.89e-10 ***
## DreissenidsUninvaded -0.075003 0.083028 198.446052 -0.903 0.36744
## log(CA.SA) 0.047215 0.032257 62.385805 1.464 0.14828
## log(Mean_Depth_m) -0.137653 0.049696 63.297223 -2.770 0.00735 **
## Percent_Ag 0.012407 0.002696 67.534806 4.602 1.90e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.628
## log(CA.SA) -0.512 0.042
## lg(Mn_Dpt_) -0.568 0.071 -0.034
## Percent_Ag -0.238 0.276 -0.053 -0.063
plot(TN)
qqPlot(resid(TN))
## 3666 1395
## 1966 711
Chl<-lmer(log(Extracted_Chl.a+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag + (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=OWCSLAP)
summary(Chl)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## log(Extracted_Chl.a + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +
## Percent_Ag + (1 | Sample_Year) + (1 | Sample_Month) + (1 |
## LakeID) + (1 | Sample_Year:LakeID)
## Data: OWCSLAP
##
## REML criterion at convergence: 5949.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.5567 -0.4517 0.0417 0.5127 4.1829
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sample_Year:LakeID (Intercept) 0.05400 0.2324
## LakeID (Intercept) 0.21540 0.4641
## Sample_Month (Intercept) 0.02576 0.1605
## Sample_Year (Intercept) 0.01314 0.1146
## Residual 0.49917 0.7065
## Number of obs: 2591, groups:
## Sample_Year:LakeID, 342; LakeID, 67; Sample_Month, 6; Sample_Year, 6
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.355434 0.282568 85.411626 4.797 6.77e-06 ***
## DreissenidsUninvaded -0.146767 0.167536 127.257977 -0.876 0.38266
## log(CA.SA) 0.037582 0.060147 56.206917 0.625 0.53461
## log(Mean_Depth_m) -0.295017 0.092811 57.298075 -3.179 0.00239 **
## Percent_Ag 0.002679 0.005059 60.535425 0.529 0.59842
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.628
## log(CA.SA) -0.476 0.042
## lg(Mn_Dpt_) -0.533 0.078 -0.033
## Percent_Ag -0.244 0.294 -0.052 -0.061
plot(Chl)
qqPlot(resid(Chl))
## 1989 3530
## 1003 1856
Secchi<-lmer(log(Secchi_Depth+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag + (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=OWCSLAP)
summary(Secchi)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## log(Secchi_Depth + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +
## Percent_Ag + (1 | Sample_Year) + (1 | Sample_Month) + (1 |
## LakeID) + (1 | Sample_Year:LakeID)
## Data: OWCSLAP
##
## REML criterion at convergence: -311.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.9201 -0.5351 0.0207 0.5662 4.7949
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sample_Year:LakeID (Intercept) 1.150e-02 0.107227
## LakeID (Intercept) 7.080e-02 0.266084
## Sample_Month (Intercept) 4.551e-05 0.006746
## Sample_Year (Intercept) 8.639e-04 0.029393
## Residual 4.079e-02 0.201956
## Number of obs: 2590, groups:
## Sample_Year:LakeID, 342; LakeID, 67; Sample_Month, 6; Sample_Year, 6
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.063472 0.140820 95.144934 7.552 2.60e-11 ***
## DreissenidsUninvaded -0.091870 0.078020 230.031893 -1.178 0.2402
## log(CA.SA) -0.071893 0.033512 62.546322 -2.145 0.0358 *
## log(Mean_Depth_m) 0.336271 0.051584 63.305400 6.519 1.34e-08 ***
## Percent_Ag -0.002311 0.002784 68.034599 -0.830 0.4093
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.586
## log(CA.SA) -0.523 0.034
## lg(Mn_Dpt_) -0.579 0.064 -0.033
## Percent_Ag -0.212 0.250 -0.059 -0.068
plot(Secchi)
qqPlot(resid(Secchi))
## 4582 4641
## 2411 2440
TC<-lmer(log(True_Color+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag + (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=OWCSLAP)
summary(TC)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## log(True_Color + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +
## Percent_Ag + (1 | Sample_Year) + (1 | Sample_Month) + (1 |
## LakeID) + (1 | Sample_Year:LakeID)
## Data: OWCSLAP
##
## REML criterion at convergence: 2860.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -9.0242 -0.4258 0.0510 0.5180 3.4438
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sample_Year:LakeID (Intercept) 0.06792 0.2606
## LakeID (Intercept) 0.18192 0.4265
## Sample_Month (Intercept) 0.01635 0.1279
## Sample_Year (Intercept) 0.07901 0.2811
## Residual 0.13060 0.3614
## Number of obs: 2612, groups:
## Sample_Year:LakeID, 343; LakeID, 67; Sample_Month, 6; Sample_Year, 6
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.179e+00 2.738e-01 7.195e+01 7.957 1.85e-11 ***
## DreissenidsUninvaded 3.631e-01 1.480e-01 1.507e+02 2.453 0.01532 *
## log(CA.SA) 1.552e-01 5.483e-02 6.103e+01 2.830 0.00629 **
## log(Mean_Depth_m) -2.406e-01 8.455e-02 6.207e+01 -2.846 0.00599 **
## Percent_Ag -2.579e-04 4.599e-03 6.587e+01 -0.056 0.95545
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.572
## log(CA.SA) -0.446 0.039
## lg(Mn_Dpt_) -0.498 0.074 -0.033
## Percent_Ag -0.218 0.285 -0.054 -0.063
plot(TC)
qqPlot(resid(TC))
## 3656 4774
## 1940 2531
Full model error: boundary (singular) fit and model failed to converge
Remove Sample_Year, Sample_Month, and Sample_Year:LakeID (low variance) to resolve
SBChl<-lmer(log(ESF_FP_Chl.a+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag + (1|LakeID), data=SBCSLAP)
summary(SBChl)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## log(ESF_FP_Chl.a + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +
## Percent_Ag + (1 | LakeID)
## Data: SBCSLAP
##
## REML criterion at convergence: 990.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.96703 -0.56007 0.06596 0.62799 2.26269
##
## Random effects:
## Groups Name Variance Std.Dev.
## LakeID (Intercept) 2.293 1.514
## Residual 4.428 2.104
## Number of obs: 219, groups: LakeID, 42
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.12907 1.56564 35.94401 3.276 0.00234 **
## DreissenidsUninvaded -0.21254 0.98272 46.03089 -0.216 0.82973
## log(CA.SA) 0.26531 0.33747 35.26516 0.786 0.43701
## log(Mean_Depth_m) -0.42431 0.50696 28.67188 -0.837 0.40953
## Percent_Ag 0.03699 0.02638 35.94149 1.402 0.16953
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.710
## log(CA.SA) -0.503 0.070
## lg(Mn_Dpt_) -0.544 0.071 0.058
## Percent_Ag -0.392 0.501 -0.077 -0.090
plot(SBChl)
qqPlot(resid(SBChl))
## 614 4310
## 44 195
We want to add TN:TP as a predictor (fixed effect), but these variables aren’t collected with shoreline bloom data. So we will instead use the yearly average for a lake.
#Extracting average annual TN_TP for each lake
library(plyr)
avgTNTP<-ddply(OWCSLAP, c("LakeID", "Sample_Year"), summarize,
Mean = mean(TN_TP, na.rm=TRUE))
colnames(avgTNTP)[colnames(avgTNTP)=="Mean"] <- "TN_TP"
#Merge these values to the SBCSLAP df
SBCSLAP<-SBCSLAP[,c(1:49, 51:54)]
SBCSLAP<-merge(SBCSLAP, avgTNTP, by=c("LakeID", "Sample_Year"), all.x=TRUE, all.y=FALSE)
Full model error: boundary (singular) fit
Remove Sample_Month and Sample_Year:LakeID to resolve
SBmicro<-lmer(log(ESF_Microcystin) ~ Dreissenids + TN_TP + log(CA.SA) + Mean_Depth_m + Percent_Ag + (1|Sample_Year) + (1|LakeID), data=SBCSLAP)
summary(SBmicro)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## log(ESF_Microcystin) ~ Dreissenids + TN_TP + log(CA.SA) + Mean_Depth_m +
## Percent_Ag + (1 | Sample_Year) + (1 | LakeID)
## Data: SBCSLAP
##
## REML criterion at convergence: 176.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.28513 -0.53162 0.06219 0.53296 1.59305
##
## Random effects:
## Groups Name Variance Std.Dev.
## LakeID (Intercept) 1.846 1.359
## Sample_Year (Intercept) 5.793 2.407
## Residual 2.281 1.510
## Number of obs: 43, groups: LakeID, 7; Sample_Year, 6
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 16.56817 6.64246 1.83807 2.494 0.141
## DreissenidsUninvaded -1.70297 2.24234 0.83298 -0.759 0.606
## TN_TP -0.02691 0.02376 5.71424 -1.133 0.303
## log(CA.SA) -3.02465 2.17918 1.75354 -1.388 0.315
## Mean_Depth_m -0.66268 0.44983 1.60063 -1.473 0.307
## Percent_Ag -0.06315 0.06490 1.31865 -0.973 0.475
##
## Correlation of Fixed Effects:
## (Intr) DrssnU TN_TP l(CA.S Mn_Dp_
## DrssndsUnnv -0.285
## TN_TP -0.031 -0.049
## log(CA.SA) -0.850 -0.154 -0.040
## Mean_Dpth_m -0.745 0.169 -0.283 0.583
## Percent_Ag -0.694 0.596 -0.245 0.415 0.506
plot(SBmicro)
qqPlot(resid(SBmicro))
## 60 36
## 23 16
redTNTP<-lmer(log(TN_TP+.01) ~ Dreissenids + (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=redOWCSLAP)
summary(redTNTP)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## log(TN_TP + 0.01) ~ Dreissenids + (1 | Sample_Year) + (1 | Sample_Month) +
## (1 | LakeID) + (1 | Sample_Year:LakeID)
## Data: redOWCSLAP
##
## REML criterion at convergence: 1053
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -14.6301 -0.3793 0.0670 0.4569 4.2824
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sample_Year:LakeID (Intercept) 5.119e-02 0.226251
## LakeID (Intercept) 1.933e-01 0.439666
## Sample_Month (Intercept) 4.381e-05 0.006619
## Sample_Year (Intercept) 3.952e-03 0.062861
## Residual 2.563e-01 0.506224
## Number of obs: 631, groups:
## Sample_Year:LakeID, 82; LakeID, 16; Sample_Month, 6; Sample_Year, 6
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.57036 0.14745 22.47365 24.21 <2e-16 ***
## DreissenidsUninvaded -0.09378 0.17050 47.13269 -0.55 0.585
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.604
plot(redTNTP)
qqPlot(resid(redTNTP))
## 359 866
## 152 401
redBSTP<-lmer(log(TP+.01) ~ Dreissenids + (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=redBSCSLAP)
summary(redBSTP)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## log(TP + 0.01) ~ Dreissenids + (1 | Sample_Year) + (1 | Sample_Month) +
## (1 | LakeID) + (1 | Sample_Year:LakeID)
## Data: redBSCSLAP
##
## REML criterion at convergence: 941.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9133 -0.5193 -0.0992 0.3425 4.2140
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sample_Year:LakeID (Intercept) 0.08718 0.29527
## LakeID (Intercept) 0.19656 0.44335
## Sample_Month (Intercept) 0.01368 0.11695
## Sample_Year (Intercept) 0.00338 0.05813
## Residual 0.23610 0.48590
## Number of obs: 567, groups:
## Sample_Year:LakeID, 86; LakeID, 16; Sample_Month, 6; Sample_Year, 6
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -3.41111 0.16546 21.79854 -20.616 8.73e-16 ***
## DreissenidsUninvaded -0.06031 0.19384 32.13280 -0.311 0.758
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.609
plot(redBSTP)
qqPlot(resid(redBSTP))
## 930 870
## 393 367
redChl<-lmer(log(Extracted_Chl.a+1) ~ Dreissenids + (1|LakeID) + (1|Sample_Year) + (1|Sample_Month) + (1|Sample_Year:LakeID), data=redOWCSLAP)
summary(redChl)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(Extracted_Chl.a + 1) ~ Dreissenids + (1 | LakeID) + (1 |
## Sample_Year) + (1 | Sample_Month) + (1 | Sample_Year:LakeID)
## Data: redOWCSLAP
##
## REML criterion at convergence: 888.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6791 -0.6123 -0.0104 0.5567 3.7645
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sample_Year:LakeID (Intercept) 0.0245791 0.1568
## LakeID (Intercept) 0.0623684 0.2497
## Sample_Month (Intercept) 0.0110021 0.1049
## Sample_Year (Intercept) 0.0009305 0.0305
## Residual 0.2031526 0.4507
## Number of obs: 635, groups:
## Sample_Year:LakeID, 83; LakeID, 16; Sample_Month, 6; Sample_Year, 6
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.21908 0.10166 18.65722 11.991 3.29e-10 ***
## DreissenidsUninvaded -0.03321 0.11175 28.88199 -0.297 0.768
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.573
plot(redChl)
qqPlot(resid(redChl))
## 433 161
## 199 69
-Sample_Year and Sample_Month removed for having low variance
redSecchi <- lmer(log(Secchi_Depth) ~ Dreissenids + (1|LakeID)+ (1|Sample_Year) + (1|Sample_Month) + (1|Sample_Year:LakeID), data=redOWCSLAP)
summary(redSecchi)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(Secchi_Depth) ~ Dreissenids + (1 | LakeID) + (1 | Sample_Year) +
## (1 | Sample_Month) + (1 | Sample_Year:LakeID)
## Data: redOWCSLAP
##
## REML criterion at convergence: 151
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.0631 -0.5199 -0.0037 0.5757 3.9560
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sample_Year:LakeID (Intercept) 0.0221217 0.14873
## LakeID (Intercept) 0.0470875 0.21700
## Sample_Month (Intercept) 0.0002708 0.01646
## Sample_Year (Intercept) 0.0005295 0.02301
## Residual 0.0582462 0.24134
## Number of obs: 629, groups:
## Sample_Year:LakeID, 82; LakeID, 16; Sample_Month, 6; Sample_Year, 6
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.44876 0.07569 21.04598 19.140 8.61e-15 ***
## DreissenidsUninvaded -0.08699 0.09117 39.48953 -0.954 0.346
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.627
plot(redSecchi)
qqPlot(resid(redSecchi))
## 959 1261
## 439 595
Full model error: Model failed to converge
Remove Sample_Year and Sample_Month (low variance) to resolve
redOWTC<-lmer(True_Color ~ Dreissenids + (1|LakeID) + + (1|Sample_Year:LakeID), data=redOWCSLAP)
summary(redOWTC)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: True_Color ~ Dreissenids + (1 | LakeID) + +(1 | Sample_Year:LakeID)
## Data: redOWCSLAP
##
## REML criterion at convergence: 3965.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.2594 -0.4913 -0.0763 0.4095 5.1558
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sample_Year:LakeID (Intercept) 49.79 7.056
## LakeID (Intercept) 18.05 4.249
## Residual 19.51 4.417
## Number of obs: 638, groups: Sample_Year:LakeID, 83; LakeID, 16
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 12.204 1.860 15.369 6.562 7.98e-06 ***
## DreissenidsUninvaded 2.918 2.497 19.403 1.168 0.257
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.699
plot(redOWTC)
qqPlot(resid(redOWTC))
## 318 311
## 140 135
-Sample_Year removed for low variance
redSBChl<-lmer(log(ESF_FP_Chl.a) ~ Dreissenids + (1|Sample_Month) + (1|LakeID) , data=redSBCSLAP)
summary(redSBChl)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(ESF_FP_Chl.a) ~ Dreissenids + (1 | Sample_Month) + (1 | LakeID)
## Data: redSBCSLAP
##
## REML criterion at convergence: 342.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.49435 -0.52241 0.07225 0.49418 1.98859
##
## Random effects:
## Groups Name Variance Std.Dev.
## LakeID (Intercept) 2.057 1.434
## Sample_Month (Intercept) 1.822 1.350
## Residual 4.384 2.094
## Number of obs: 76, groups: LakeID, 10; Sample_Month, 6
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 5.9401 0.9878 7.9946 6.014 0.000319 ***
## DreissenidsUninvaded -0.2759 1.0549 12.7054 -0.262 0.797879
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.534
plot(redSBChl)
qqPlot(resid(redSBChl))
## 777 319
## 58 26
Same as for the global dataset, we need to use average yearly TN:TP values
#Extracting average annual TP for each lake
library(plyr)
redavgTNTP<-ddply(redOWCSLAP, c("LakeID", "Sample_Year"), summarize,
Mean = mean(TN_TP, na.rm=TRUE))
colnames(redavgTNTP)[colnames(redavgTNTP)=="Mean"] <- "TN_TP"
#Merge these values to the SBCSLAP df
redSBCSLAP<-redSBCSLAP[,c(1:49, 51:54)]
redSBCSLAP<-merge(redSBCSLAP, redavgTNTP, by=c("LakeID", "Sample_Year"), all.x=TRUE, all.y=FALSE)
Full model error: failed to converge
Remove Sample_Month, LakeID, and Sample_Year:LakeID to resolve
redSBmicro<-lmer(log(ESF_Microcystin) ~ Dreissenids + log(TN_TP) + (1|Sample_Year) , data=redSBCSLAP)
summary(redSBmicro)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(ESF_Microcystin) ~ Dreissenids + log(TN_TP) + (1 | Sample_Year)
## Data: redSBCSLAP
##
## REML criterion at convergence: 65.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.05259 -0.49531 0.08973 0.52344 1.73701
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sample_Year (Intercept) 0.6936 0.8328
## Residual 1.8834 1.3724
## Number of obs: 20, groups: Sample_Year, 5
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.4301 3.9041 1.6197 1.647 0.269
## DreissenidsUninvaded 1.8504 1.7803 4.7588 1.039 0.349
## log(TN_TP) -0.7165 0.8957 1.5288 -0.800 0.529
##
## Correlation of Fixed Effects:
## (Intr) DrssnU
## DrssndsUnnv -0.343
## log(TN_TP) -0.990 0.303
plot(redSBmicro)
qqPlot(resid(redSBmicro))
## 25 19
## 15 13
#Global Dataset
#create column based on 25 ug/L BGA guideline (from NYS DEC)
CSLAP$Bloom <- ifelse(CSLAP$ESF_FP_BGA >=25.0, "Bloom", "No Bloom")
CSLAP$Bloom <- as.factor(CSLAP$Bloom)
#Separate based on invasion status and drop unused levels of Lake Name
I.CSLAP<-CSLAP[CSLAP$Dreissenids == "Invaded",]
I.CSLAP$Lake_Name<-droplevels(I.CSLAP$Lake_Name)
U.CSLAP<-CSLAP[CSLAP$Dreissenids == "Uninvaded",]
U.CSLAP$Lake_Name<-droplevels(U.CSLAP$Lake_Name)
#Reduced Dataset
#create column based on 25 ug/L BGA guideline (from NYS DEC)
redCSLAP$Bloom <- ifelse(redCSLAP$ESF_FP_BGA >=25.0, "Bloom", "No Bloom")
redCSLAP$Bloom <- as.factor(redCSLAP$Bloom)
#Separate based on invasion status and drop unused levels of Lake Name
I.redCSLAP<-redCSLAP[redCSLAP$Dreissenids == "Invaded",]
I.redCSLAP$Lake_Name<-droplevels(I.redCSLAP$Lake_Name)
U.redCSLAP<-redCSLAP[redCSLAP$Dreissenids == "Uninvaded",]
U.redCSLAP$Lake_Name<-droplevels(U.redCSLAP$Lake_Name)
table(CSLAP[, c("Bloom", "Dreissenids")])
## Dreissenids
## Bloom Invaded Uninvaded
## Bloom 42 98
## No Bloom 326 2355
HABFreqYear<-as.data.frame(table(CSLAP[,c("Bloom","Dreissenids","Sample_Year")]))
HABFreqLake<-as.data.frame(table(CSLAP[,c("Bloom","Dreissenids","Lake_Name")]))
#separate by invaded and uninvaded
I.blooms<-as.data.frame(table(I.CSLAP[,c("Lake_Name","Bloom")]))
I.blooms$Dreissenids<-rep("Invaded", 16)
U.blooms<-as.data.frame(table(U.CSLAP[,c("Lake_Name","Bloom")]))
U.blooms$Dreissenids<-rep("Uninvaded", 124)
#Make into one dataframe for analysis
blooms<-rbind(I.blooms, U.blooms)
#Remove 'no blooms'
blooms<-blooms[blooms$Bloom == "Bloom",]
fit<-aov(Freq ~ Dreissenids, data=blooms)
summary(fit)
## Df Sum Sq Mean Sq F value Pr(>F)
## Dreissenids 1 95.4 95.40 4.652 0.0346 *
## Residuals 68 1394.6 20.51
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(fit)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Freq ~ Dreissenids, data = blooms)
##
## $Dreissenids
## diff lwr upr p adj
## Uninvaded-Invaded -3.669355 -7.064224 -0.2744858 0.034559
hist(blooms$Freq)
model<-glmer(Freq ~ Dreissenids + (1|Lake_Name), family=poisson(link=log), data=blooms)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: Freq ~ Dreissenids + (1 | Lake_Name)
## Data: blooms
##
## AIC BIC logLik deviance df.resid
## 238.7 245.5 -116.4 232.7 67
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.5723 -0.4914 -0.4914 0.2667 0.9971
##
## Random effects:
## Groups Name Variance Std.Dev.
## Lake_Name (Intercept) 2.755 1.66
## Number of obs: 70, groups: Lake_Name, 68
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2140 0.7002 -0.306 0.76
## DreissenidsUninvaded -0.5421 0.6722 -0.806 0.42
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.887
res<-simulateResiduals(model)
plot(res)
testResiduals(res)
## $uniformity
##
## One-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.07856, p-value = 0.7508
## alternative hypothesis: two-sided
##
##
## $dispersion
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## ratioObsSim = 0.87134, p-value = 0.832
## alternative hypothesis: two.sided
##
##
## $outliers
##
## DHARMa outlier test based on exact binomial test
##
## data: simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 70.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
## $uniformity
##
## One-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.07856, p-value = 0.7508
## alternative hypothesis: two-sided
##
##
## $dispersion
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## ratioObsSim = 0.87134, p-value = 0.832
## alternative hypothesis: two.sided
##
##
## $outliers
##
## DHARMa outlier test based on exact binomial test
##
## data: simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 70.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
testZeroInflation(res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0.97522, p-value = 0.88
## alternative hypothesis: two.sided
redHABFreqYear<-as.data.frame(table(redCSLAP[,c("Bloom","Dreissenids","Sample_Year")]))
##Data organization for analysis
#separate by invaded and uninvaded
I.redblooms<-as.data.frame(table(I.redCSLAP[,c("Lake_Name","Bloom")]))
I.redblooms$Dreissenids<-rep("Invaded", 16)
U.redblooms<-as.data.frame(table(U.redCSLAP[,c("Lake_Name","Bloom")]))
U.redblooms$Dreissenids<-rep("Uninvaded", 20)
#Make into one dataframe for analysis
redblooms<-rbind(I.redblooms, U.redblooms)
#Remove 'no blooms'
redblooms<-redblooms[redblooms$Bloom == "Bloom",]
redfit<-aov(Freq ~ Dreissenids, data=redblooms)
summary(redfit)
## Df Sum Sq Mean Sq F value Pr(>F)
## Dreissenids 1 80.3 80.28 2.312 0.148
## Residuals 16 555.5 34.72
TukeyHSD(redfit)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Freq ~ Dreissenids, data = redblooms)
##
## $Dreissenids
## diff lwr upr p adj
## Uninvaded-Invaded -4.25 -10.17502 1.675019 0.1478745
hist(redblooms$Freq)
redmodel<-glmer(Freq ~ Dreissenids + (1|Lake_Name), family=poisson(link=log), data=redblooms)
summary(redmodel)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: Freq ~ Dreissenids + (1 | Lake_Name)
## Data: redblooms
##
## AIC BIC logLik deviance df.resid
## 75.1 77.8 -34.6 69.1 15
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.62869 -0.57154 0.08755 0.23899 0.73619
##
## Random effects:
## Groups Name Variance Std.Dev.
## Lake_Name (Intercept) 2.376 1.541
## Number of obs: 18, groups: Lake_Name, 16
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.01081 0.73633 0.015 0.988
## DreissenidsUninvaded -0.29741 0.79320 -0.375 0.708
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.699
redres<-simulateResiduals(redmodel)
plot(redres)
## Warning in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots): basis dimension, k, increased to minimum possible
## Unable to calculate quantile regression for quantile 0.25. Possibly to few (unique) data points / predictions. Will be ommited in plots and significance calculations.
## Warning in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots): basis dimension, k, increased to minimum possible
## Unable to calculate quantile regression for quantile 0.5. Possibly to few (unique) data points / predictions. Will be ommited in plots and significance calculations.
## Warning in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots): basis dimension, k, increased to minimum possible
## Unable to calculate quantile regression for quantile 0.75. Possibly to few (unique) data points / predictions. Will be ommited in plots and significance calculations.
testResiduals(redres)
## $uniformity
##
## One-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.17329, p-value = 0.5924
## alternative hypothesis: two-sided
##
##
## $dispersion
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## ratioObsSim = 1.1072, p-value = 0.528
## alternative hypothesis: two.sided
##
##
## $outliers
##
## DHARMa outlier test based on exact binomial test
##
## data: simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 18.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
## $uniformity
##
## One-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.17329, p-value = 0.5924
## alternative hypothesis: two-sided
##
##
## $dispersion
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## ratioObsSim = 1.1072, p-value = 0.528
## alternative hypothesis: two.sided
##
##
## $outliers
##
## DHARMa outlier test based on exact binomial test
##
## data: simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 18.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
testZeroInflation(redres)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0.91051, p-value = 0.952
## alternative hypothesis: two.sided
HTBloom<-CSLAP[which(CSLAP$Bloom == "Bloom"), ]
#create df of only OW blooms, then use 10 ug/L microcystin to get HT blooms (DEC open water guideline)
OWHTBloom<-HTBloom[HTBloom$Info_Type == "OW",]
OWHTBloom$HTBloom <- ifelse(OWHTBloom$ESF_Microcystin >= 10.0, "HT Bloom", "No HT Bloom")
#create df of only SB blooms, then use 20 ug/L microcystin to get HT blooms (DEC shoreline guideline)
SBHTBloom<-HTBloom[HTBloom$Info_Type == "SB",]
SBHTBloom$HTBloom <- ifelse(SBHTBloom$ESF_Microcystin >= 20.0, "HT Bloom", "No HT Bloom")
#Separate based on invasion status and drop unused levels of Lake Name
I.SBHTBloom<-SBHTBloom[SBHTBloom$Dreissenids == "Invaded",]
I.SBHTBloom$Lake_Name<-droplevels(I.SBHTBloom$Lake_Name)
U.SBHTBloom<-SBHTBloom[SBHTBloom$Dreissenids == "Uninvaded",]
U.SBHTBloom$Lake_Name<-droplevels(U.SBHTBloom$Lake_Name)
#separate by invaded and uninvaded
I.HTblooms<-as.data.frame(table(I.SBHTBloom[,c("Lake_Name","HTBloom")]))
I.HTblooms$Dreissenids<-rep("Invaded", 8)
U.HTblooms<-as.data.frame(table(U.SBHTBloom[,c("Lake_Name","HTBloom")]))
U.HTblooms$Dreissenids<-rep("Uninvaded", 56)
#Make into one dataframe for analysis
HTblooms<-rbind(I.HTblooms, U.HTblooms)
#Remove 'no blooms'
HTblooms<-HTblooms[HTblooms$HTBloom == "HT Bloom",]
HTfit<-aov(Freq ~ Dreissenids, data=HTblooms)
summary(HTfit)
## Df Sum Sq Mean Sq F value Pr(>F)
## Dreissenids 1 27.2 27.16 2.479 0.126
## Residuals 30 328.7 10.96
TukeyHSD(HTfit)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Freq ~ Dreissenids, data = HTblooms)
##
## $Dreissenids
## diff lwr upr p adj
## Uninvaded-Invaded -2.785714 -6.399216 0.8277879 0.1258776
hist(HTblooms$Freq)
HTmodel<-glmer(Freq ~ Dreissenids + (1|Lake_Name), family=poisson(link=log), data=HTblooms)
summary(HTmodel)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: Freq ~ Dreissenids + (1 | Lake_Name)
## Data: HTblooms
##
## AIC BIC logLik deviance df.resid
## 55.7 60.1 -24.9 49.7 29
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.14857 -0.04613 -0.04613 -0.04613 0.22154
##
## Random effects:
## Groups Name Variance Std.Dev.
## Lake_Name (Intercept) 29.59 5.439
## Number of obs: 32, groups: Lake_Name, 31
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.160 3.703 -0.853 0.393
## DreissenidsUninvaded -2.929 3.086 -0.949 0.342
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.803
HTres<-simulateResiduals(HTmodel)
plot(HTres)
testResiduals(HTres)
## $uniformity
##
## One-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.18589, p-value = 0.1929
## alternative hypothesis: two-sided
##
##
## $dispersion
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## ratioObsSim = 0.00048173, p-value = 0.328
## alternative hypothesis: two.sided
##
##
## $outliers
##
## DHARMa outlier test based on exact binomial test
##
## data: simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 32.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
## $uniformity
##
## One-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.18589, p-value = 0.1929
## alternative hypothesis: two-sided
##
##
## $dispersion
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## ratioObsSim = 0.00048173, p-value = 0.328
## alternative hypothesis: two.sided
##
##
## $outliers
##
## DHARMa outlier test based on exact binomial test
##
## data: simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 32.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
testZeroInflation(HTres)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 1.0312, p-value = 0.952
## alternative hypothesis: two.sided
redHTBloom<-redCSLAP[which(redCSLAP$Bloom == "Bloom"), ]
#create df of only OW blooms, then use 10 ug/L microcystin to get HT blooms (DEC open water guideline)
redOWHTBloom<-redHTBloom[redHTBloom$Info_Type == "OW",]
redOWHTBloom$HTBloom <- ifelse(redOWHTBloom$ESF_Microcystin >= 10.0, "HT Bloom", "No HT Bloom")
#create df of only SB blooms, then use 20 ug/L microcystin to get HT blooms (DEC shoreline guideline)
redSBHTBloom<-redHTBloom[redHTBloom$Info_Type == "SB",]
redSBHTBloom$HTBloom <- ifelse(redSBHTBloom$ESF_Microcystin >= 20.0, "HT Bloom", "No HT Bloom")
#Separate based on invasion status and drop unused levels of Lake Name
redI.SBHTBloom<-redSBHTBloom[redSBHTBloom$Dreissenids == "Invaded",]
redI.SBHTBloom$Lake_Name<-droplevels(redI.SBHTBloom$Lake_Name)
redU.SBHTBloom<-redSBHTBloom[redSBHTBloom$Dreissenids == "Uninvaded",]
redU.SBHTBloom$Lake_Name<-droplevels(redU.SBHTBloom$Lake_Name)
#separate by invaded and uninvaded
redI.HTblooms<-as.data.frame(table(redI.SBHTBloom[,c("Lake_Name","HTBloom")]))
redI.HTblooms$Dreissenids<-rep("Invaded", 8)
redU.HTblooms<-as.data.frame(table(redU.SBHTBloom[,c("Lake_Name","HTBloom")]))
redU.HTblooms$Dreissenids<-rep("Uninvaded", 6)
#Make into one dataframe for analysis
redHTblooms<-rbind(redI.HTblooms, redU.HTblooms)
#Remove 'no blooms'
redHTblooms<-redHTblooms[redHTblooms$HTBloom == "HT Bloom",]
redHTfit<-aov(Freq ~ Dreissenids, data=redHTblooms)
summary(redHTfit)
## Df Sum Sq Mean Sq F value Pr(>F)
## Dreissenids 1 26.67 26.67 1.751 0.222
## Residuals 8 121.83 15.23
TukeyHSD(redHTfit)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Freq ~ Dreissenids, data = redHTblooms)
##
## $Dreissenids
## diff lwr upr p adj
## Uninvaded-Invaded -3.333333 -9.142215 2.475548 0.2223148
hist(redHTblooms$Freq)
redHTmodel<-glmer(Freq ~ Dreissenids + (1|Lake_Name), family=poisson(link=log), data=redHTblooms)
summary(redHTmodel)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: Freq ~ Dreissenids + (1 | Lake_Name)
## Data: redHTblooms
##
## AIC BIC logLik deviance df.resid
## 28.7 29.6 -11.4 22.7 7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.50742 -0.20937 -0.20937 0.02782 0.85377
##
## Random effects:
## Groups Name Variance Std.Dev.
## Lake_Name (Intercept) 3.781 1.944
## Number of obs: 10, groups: Lake_Name, 9
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.3834 1.3842 -0.277 0.782
## DreissenidsUninvaded -2.5782 1.8428 -1.399 0.162
##
## Correlation of Fixed Effects:
## (Intr)
## DrssndsUnnv -0.464
redHTres<-simulateResiduals(redHTmodel)
plot(redHTres)
## Warning in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots): basis dimension, k, increased to minimum possible
## Unable to calculate quantile regression for quantile 0.25. Possibly to few (unique) data points / predictions. Will be ommited in plots and significance calculations.
## Warning in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots): basis dimension, k, increased to minimum possible
## Unable to calculate quantile regression for quantile 0.5. Possibly to few (unique) data points / predictions. Will be ommited in plots and significance calculations.
## Warning in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots): basis dimension, k, increased to minimum possible
## Unable to calculate quantile regression for quantile 0.75. Possibly to few (unique) data points / predictions. Will be ommited in plots and significance calculations.
testResiduals(redHTres)
## $uniformity
##
## One-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.13156, p-value = 0.9859
## alternative hypothesis: two-sided
##
##
## $dispersion
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## ratioObsSim = 0.67525, p-value = 0.504
## alternative hypothesis: two.sided
##
##
## $outliers
##
## DHARMa outlier test based on exact binomial test
##
## data: simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 10.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
## $uniformity
##
## One-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.13156, p-value = 0.9859
## alternative hypothesis: two-sided
##
##
## $dispersion
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## ratioObsSim = 0.67525, p-value = 0.504
## alternative hypothesis: two.sided
##
##
## $outliers
##
## DHARMa outlier test based on exact binomial test
##
## data: simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 10.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
testZeroInflation(redHTres)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 1.0069, p-value = 1
## alternative hypothesis: two.sided
#Extracting average annual TP for each lake
library(plyr)
avgTP<-ddply(OWCSLAP, c("LakeID", "Sample_Year"), summarize,
Mean = mean(TP, na.rm=TRUE))
#Extracting average annual TN:TP for each lake
avgTNTP<-ddply(OWCSLAP, c("LakeID", "Sample_Year"), summarize,
Mean = mean(TN_TP, na.rm=TRUE))
#Taking only rows where there is a value for Microcystin
Bloom<-CSLAP[!is.na(CSLAP$ESF_Microcystin),]
#Which rows are missing TP values?
noTPbloom<-Bloom[is.na(Bloom$TP),]
#Which rows are missing TN:TP values?
noTNTPbloom<-Bloom[is.na(Bloom$TN_TP),]
#merge average TP to noTPbloom data frame
avgTPbloom<-merge(noTPbloom, avgTP, by=c("LakeID", "Sample_Year"), all.x=TRUE)
#merge average TN:TP to noTNTPbloom data frame
avgTNTPbloom<-merge(noTNTPbloom, avgTNTP, by=c("LakeID", "Sample_Year"), all.x = TRUE)
#remove original TP column, and re-name the average TP column
avgTPbloom<-avgTPbloom[, c(1:16,18:56)]
colnames(avgTPbloom)[colnames(avgTPbloom)=="Mean"] <- "TP"
#remove original TN:TP column, and re-name the average TN:TP column
avgTNTPbloom<-avgTNTPbloom[, c(1:49, 51:56)]
colnames(avgTNTPbloom)[colnames(avgTNTPbloom)=="Mean"] <- "TN_TP"
#re-order columns
avgTPbloom<-avgTPbloom[, c(1:16,55,17:54)]
#re-order columns
avgTNTPbloom<-avgTNTPbloom[, c(1:49,55,50:54)]
#remove TN_TP from avgTPbloom and merge with avgTNTP bloom
avgTPbloom<-avgTPbloom[,c(1:49,51:55)]
TNTPmerge<-avgTNTPbloom[,c(5,50)]
avgTPTNTPbloom<-merge(avgTPbloom, TNTPmerge, by="Sample_Name")
#re-order columns
avgTPTNTPbloom<-avgTPTNTPbloom[,c(1:49,55,50:54)]
#combine two data frames
hadTP<-Bloom[!is.na(Bloom$TP),]
completeBloom<-rbind(hadTP, avgTPTNTPbloom)
#Split by Info_Type
OWcompleteBloom<-completeBloom[completeBloom$Info_Type == "OW",]
SBcompleteBloom<-completeBloom[completeBloom$Info_Type == "SB",]
#split by invasion status
OWinvadedBlooms<-OWcompleteBloom[OWcompleteBloom$Dreissenids == "Invaded",]
OWuninvadedBlooms<-OWcompleteBloom[OWcompleteBloom$Dreissenids == "Uninvaded",]
SBinvadedBlooms<-SBcompleteBloom[SBcompleteBloom$Dreissenids == "Invaded",]
SBuninvadedBlooms<-SBcompleteBloom[SBcompleteBloom$Dreissenids == "Uninvaded",]
#create linear models
OWinvaded.lm<-lm(log(OWinvadedBlooms$ESF_Microcystin+.1)~log(OWinvadedBlooms$TP+.1))
OWuninvaded.lm<-lm(log(OWuninvadedBlooms$ESF_Microcystin+.1)~log(OWuninvadedBlooms$TP+.1))
summary(OWinvaded.lm)
##
## Call:
## lm(formula = log(OWinvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWinvadedBlooms$TP +
## 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.48252 -0.17973 0.04444 0.11749 0.78628
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.6021 2.0963 4.103 0.00175 **
## log(OWinvadedBlooms$TP + 0.1) 4.2365 0.9805 4.321 0.00121 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3225 on 11 degrees of freedom
## Multiple R-squared: 0.6293, Adjusted R-squared: 0.5956
## F-statistic: 18.67 on 1 and 11 DF, p-value: 0.001213
summary(OWuninvaded.lm)
##
## Call:
## lm(formula = log(OWuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWuninvadedBlooms$TP +
## 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8498 -0.3228 -0.1828 0.0744 4.3311
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.072 3.407 -1.195 0.235
## log(OWuninvadedBlooms$TP + 0.1) -1.642 1.557 -1.054 0.295
##
## Residual standard error: 0.6902 on 87 degrees of freedom
## Multiple R-squared: 0.01261, Adjusted R-squared: 0.001258
## F-statistic: 1.111 on 1 and 87 DF, p-value: 0.2948
OWinvaded.lm.sum<-summary(OWinvaded.lm)
OWuninvaded.lm.sum<-summary(OWuninvaded.lm)
ggplot(OWcompleteBloom, aes(x=TP, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
geom_point(cex=2) +
geom_smooth(data=subset(OWcompleteBloom, Dreissenids == "Invaded" ),
aes(x=TP, y=ESF_Microcystin, color=Dreissenids), method=lm, se=FALSE)+
theme_classic()+
scale_x_continuous(trans='log10') +
scale_y_continuous(trans='log10', breaks=c(1,100, 10000)) +
scale_color_grey()
## `geom_smooth()` using formula 'y ~ x'
ggsave("./output_figures/Fig 2-1 Global OW.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'
#create linear models
SBinvaded.lm<-lm(log(SBinvadedBlooms$ESF_Microcystin+.1)~log(SBinvadedBlooms$TP+.1))
SBuninvaded.lm<-lm(log(SBuninvadedBlooms$ESF_Microcystin+.1)~log(SBuninvadedBlooms$TP+.1))
summary(SBinvaded.lm)
##
## Call:
## lm(formula = log(SBinvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBinvadedBlooms$TP +
## 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2455 -0.7569 0.0850 0.8098 2.3340
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.834 16.000 1.240 0.232
## log(SBinvadedBlooms$TP + 0.1) 7.589 7.386 1.027 0.319
##
## Residual standard error: 1.431 on 17 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.05847, Adjusted R-squared: 0.003082
## F-statistic: 1.056 on 1 and 17 DF, p-value: 0.3186
summary(SBuninvaded.lm)
##
## Call:
## lm(formula = log(SBuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBuninvadedBlooms$TP +
## 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1845 -1.8375 -0.3783 1.8765 5.8238
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.79 35.71 0.974 0.340
## log(SBuninvadedBlooms$TP + 0.1) 13.95 16.52 0.845 0.407
##
## Residual standard error: 2.653 on 22 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.0314, Adjusted R-squared: -0.01262
## F-statistic: 0.7133 on 1 and 22 DF, p-value: 0.4074
SBinvaded.lm.sum<-summary(SBinvaded.lm)
SBuninvaded.lm.sum<-summary(SBuninvaded.lm)
ggplot(SBcompleteBloom, aes(x=TP, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
geom_point(cex=2) +
theme_classic()+
scale_x_continuous(trans='log10') +
scale_y_continuous(trans='log10', breaks=c(1,100, 10000)) +
scale_color_grey()
## Warning: Removed 3 rows containing missing values (geom_point).
ggsave("./output_figures/Fig 2-1 Global SB.png")
## Saving 7 x 5 in image
## Warning: Removed 3 rows containing missing values (geom_point).
#create linear models
OWTNTP_invaded.lm<-lm(log(OWinvadedBlooms$ESF_Microcystin+.1)~log(OWinvadedBlooms$TN_TP+.1))
OWTNTP_uninvaded.lm<-lm(log(OWuninvadedBlooms$ESF_Microcystin+.1)~log(OWuninvadedBlooms$TN_TP+.1))
summary(OWTNTP_invaded.lm)
##
## Call:
## lm(formula = log(OWinvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWinvadedBlooms$TN_TP +
## 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37473 -0.23835 -0.22292 -0.06378 1.11484
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5400 0.7164 0.754 0.467
## log(OWinvadedBlooms$TN_TP + 0.1) -0.2916 0.2077 -1.404 0.188
##
## Residual standard error: 0.4878 on 11 degrees of freedom
## Multiple R-squared: 0.152, Adjusted R-squared: 0.07489
## F-statistic: 1.971 on 1 and 11 DF, p-value: 0.1879
summary(OWTNTP_uninvaded.lm)
##
## Call:
## lm(formula = log(OWuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWuninvadedBlooms$TN_TP +
## 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8382 -0.2960 -0.2138 0.0185 4.3962
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.8216 0.5559 -1.478 0.143
## log(OWuninvadedBlooms$TN_TP + 0.1) 0.1014 0.1626 0.624 0.534
##
## Residual standard error: 0.6962 on 86 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.004504, Adjusted R-squared: -0.007072
## F-statistic: 0.3891 on 1 and 86 DF, p-value: 0.5344
OWTNTP_invaded.lm.sum<-summary(OWTNTP_invaded.lm)
OWTNTP_uninvaded.lm.sum<-summary(OWTNTP_uninvaded.lm)
ggplot(OWcompleteBloom, aes(x=TN_TP, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
geom_point(cex=2) +
theme_classic()+
scale_x_continuous(trans='log10') +
scale_y_continuous(trans='log10', breaks=c(1,100, 10000)) +
scale_color_grey()
## Warning: Removed 1 rows containing missing values (geom_point).
ggsave("./output_figures/Fig 2-3 Global OW.png")
## Saving 7 x 5 in image
## Warning: Removed 1 rows containing missing values (geom_point).
#create linear models
SBTNTP_invaded.lm<-lm(log(SBinvadedBlooms$ESF_Microcystin+.1)~log(SBinvadedBlooms$TN_TP+.1))
SBTNTP_uninvaded.lm<-lm(log(SBuninvadedBlooms$ESF_Microcystin+.1)~log(SBuninvadedBlooms$TN_TP+.1))
summary(SBTNTP_invaded.lm)
##
## Call:
## lm(formula = log(SBinvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBinvadedBlooms$TN_TP +
## 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5667 -0.6502 0.1916 0.7317 2.4407
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.8364 2.3907 2.441 0.0259 *
## log(SBinvadedBlooms$TN_TP + 0.1) -0.5496 0.5338 -1.030 0.3176
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.431 on 17 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.0587, Adjusted R-squared: 0.003334
## F-statistic: 1.06 on 1 and 17 DF, p-value: 0.3176
summary(SBTNTP_uninvaded.lm)
##
## Call:
## lm(formula = log(SBuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBuninvadedBlooms$TN_TP +
## 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7351 -1.2655 -0.2051 1.7642 5.8474
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.229 8.627 1.649 0.113
## log(SBuninvadedBlooms$TN_TP + 0.1) -2.817 2.529 -1.114 0.277
##
## Residual standard error: 2.622 on 22 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.05339, Adjusted R-squared: 0.01036
## F-statistic: 1.241 on 1 and 22 DF, p-value: 0.2774
SBTNTP_invaded.lm.sum<-summary(SBTNTP_invaded.lm)
SBTNTP_uninvaded.lm.sum<-summary(SBTNTP_uninvaded.lm)
ggplot(SBcompleteBloom, aes(x=TN_TP, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
geom_point(cex=2) +
theme_classic()+
scale_x_continuous(trans='log10') +
scale_y_continuous(trans='log10', breaks=c(1,100, 10000)) +
scale_color_grey()
## Warning: Removed 3 rows containing missing values (geom_point).
ggsave("./output_figures/Fig 2-3 Global SB.png")
## Saving 7 x 5 in image
## Warning: Removed 3 rows containing missing values (geom_point).
#create linear models
OWchl.invaded.lm<-lm(log(OWinvadedBlooms$ESF_Microcystin+.1)~log(OWinvadedBlooms$ESF_FP_Chl.a+.1))
OWchl.uninvaded.lm<-lm(log(OWuninvadedBlooms$ESF_Microcystin+.1)~log(OWuninvadedBlooms$ESF_FP_Chl.a+.1))
summary(OWchl.invaded.lm)
##
## Call:
## lm(formula = log(OWinvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWinvadedBlooms$ESF_FP_Chl.a +
## 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.44372 -0.13271 -0.04248 0.02934 1.07496
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.6270 0.1375 -4.560 0.00104 **
## log(OWinvadedBlooms$ESF_FP_Chl.a + 0.1) 0.1602 0.1426 1.124 0.28740
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3942 on 10 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1121, Adjusted R-squared: 0.02332
## F-statistic: 1.263 on 1 and 10 DF, p-value: 0.2874
summary(OWchl.uninvaded.lm)
##
## Call:
## lm(formula = log(OWuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWuninvadedBlooms$ESF_FP_Chl.a +
## 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.80127 -0.23851 -0.15538 -0.00707 2.03637
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.50704 0.06405 -7.916 1.37e-11
## log(OWuninvadedBlooms$ESF_FP_Chl.a + 0.1) -0.03765 0.04618 -0.815 0.417
##
## (Intercept) ***
## log(OWuninvadedBlooms$ESF_FP_Chl.a + 0.1)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.501 on 78 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.008452, Adjusted R-squared: -0.00426
## F-statistic: 0.6649 on 1 and 78 DF, p-value: 0.4173
OWchl.invaded.lm.sum<-summary(OWchl.invaded.lm)
OWchl.uninvaded.lm.sum<-summary(OWchl.uninvaded.lm)
ggplot(OWcompleteBloom, aes(x=ESF_FP_Chl.a, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
geom_point(cex=2) +
geom_smooth(method=lm, se=FALSE)+
theme_classic()+
scale_x_continuous(trans='log10') +
scale_y_continuous(trans='log10') +
scale_color_grey()
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).
ggsave("./output_figures/Fig 2-2 Global OW.png")
## Saving 7 x 5 in image
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 11 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).
#create linear models
SBchl.invaded.lm<-lm(log(SBinvadedBlooms$ESF_Microcystin+.1)~log(SBinvadedBlooms$ESF_FP_Chl.a+.1))
SBchl.uninvaded.lm<-lm(log(SBuninvadedBlooms$ESF_Microcystin+.1)~log(SBuninvadedBlooms$ESF_FP_Chl.a+.1))
summary(SBchl.invaded.lm)
##
## Call:
## lm(formula = log(SBinvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBinvadedBlooms$ESF_FP_Chl.a +
## 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2713 -0.9313 0.2731 1.0336 2.3892
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.7787 1.4482 1.919 0.071 .
## log(SBinvadedBlooms$ESF_FP_Chl.a + 0.1) 0.1318 0.2493 0.529 0.603
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.531 on 18 degrees of freedom
## Multiple R-squared: 0.01529, Adjusted R-squared: -0.03941
## F-statistic: 0.2795 on 1 and 18 DF, p-value: 0.6035
summary(SBchl.uninvaded.lm)
##
## Call:
## lm(formula = log(SBuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBuninvadedBlooms$ESF_FP_Chl.a +
## 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.2030 -1.9212 -0.1592 1.7338 5.5123
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1730 2.4462 0.480 0.636
## log(SBuninvadedBlooms$ESF_FP_Chl.a + 0.1) 0.4346 0.2928 1.484 0.151
##
## Residual standard error: 2.498 on 24 degrees of freedom
## Multiple R-squared: 0.08409, Adjusted R-squared: 0.04593
## F-statistic: 2.204 on 1 and 24 DF, p-value: 0.1507
SBchl.invaded.lm.sum<-summary(SBchl.invaded.lm)
SBchl.uninvaded.lm.sum<-summary(SBchl.uninvaded.lm)
ggplot(SBcompleteBloom, aes(x=ESF_FP_Chl.a, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
geom_point(cex=2) +
geom_smooth(method=lm, se=FALSE)+
theme_classic()+
scale_x_continuous(trans='log10') +
scale_y_continuous(trans='log10') +
scale_color_grey()
## `geom_smooth()` using formula 'y ~ x'
ggsave("./output_figures/Fig 2-2 Global SB.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'
#Extracting average annual TP for each lake
library(plyr)
redavgTP<-ddply(redOWCSLAP, c("LakeID", "Sample_Year"), summarize,
Mean = mean(TP, na.rm=TRUE))
#Extracting average annual TNTP for each lake
redavgTNTP<-ddply(redOWCSLAP, c("LakeID", "Sample_Year"), summarize,
Mean = mean(TN_TP, na.rm=TRUE))
#Taking only rows where there is a value for microcystin
redBloom<-redCSLAP[!is.na(redCSLAP$ESF_Microcystin),]
#Which rows are missing TP values?
rednoTPbloom<-redBloom[is.na(redBloom$TP),]
#Which rows are missing TNTP values?
rednoTNTPbloom<-redBloom[is.na(redBloom$TN_TP),]
#merge average TP to noTPbloom data frame
redavgTPbloom<-merge(rednoTPbloom, redavgTP, by=c("LakeID", "Sample_Year"), all.x=TRUE, all.y=FALSE)
#merge average TN:TP to noTPbloom data frame
redavgTNTPbloom<-merge(rednoTNTPbloom, redavgTNTP, by=c("LakeID", "Sample_Year"), all.x=TRUE, all.y=FALSE)
#remove original TP column, and re-name the average TP column
redavgTPbloom<-redavgTPbloom[, c(1:16, 18:56)]
colnames(redavgTPbloom)[colnames(redavgTPbloom)=="Mean"] <- "TP"
#remove original TNTP column, and re-name the average TNTP column
redavgTNTPbloom<-redavgTNTPbloom[, c(1:49, 51:56)]
colnames(redavgTNTPbloom)[colnames(redavgTNTPbloom)=="Mean"] <- "TN_TP"
#re-order columns
redavgTPbloom<-redavgTPbloom[, c(1:16,55,17:54)]
#re-order columns
redavgTNTPbloom<-redavgTNTPbloom[, c(1:49,55,50:54)]
#combine two data frames
redhadTP<-redBloom[!is.na(redBloom$TP),]
redcompleteBloom<-rbind(redhadTP, redavgTPbloom)
redcompleteBloom<-rbind(redcompleteBloom, redavgTNTPbloom)
#split by invasion status
redinvadedBlooms<-redcompleteBloom[redcompleteBloom$Dreissenids == "Invaded",]
reduninvadedBlooms<-redcompleteBloom[redcompleteBloom$Dreissenids == "Uninvaded",]
#create linear models
red.invaded.lm<-lm(log(redinvadedBlooms$ESF_Microcystin+.1)~log(redinvadedBlooms$TP+.1))
red.uninvaded.lm<-lm(log(reduninvadedBlooms$ESF_Microcystin+.1)~log(reduninvadedBlooms$TP+.1))
summary(red.invaded.lm)
##
## Call:
## lm(formula = log(redinvadedBlooms$ESF_Microcystin + 0.1) ~ log(redinvadedBlooms$TP +
## 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.702 -2.371 0.361 2.076 3.634
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.06498 12.50675 -0.005 0.996
## log(redinvadedBlooms$TP + 0.1) -0.88251 5.80406 -0.152 0.880
##
## Residual standard error: 2.266 on 30 degrees of freedom
## (21 observations deleted due to missingness)
## Multiple R-squared: 0.0007701, Adjusted R-squared: -0.03254
## F-statistic: 0.02312 on 1 and 30 DF, p-value: 0.8802
summary(red.uninvaded.lm)
##
## Call:
## lm(formula = log(reduninvadedBlooms$ESF_Microcystin + 0.1) ~
## log(reduninvadedBlooms$TP + 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7856 -0.7785 -0.1978 0.2512 5.1135
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -33.66 28.00 -1.202 0.252
## log(reduninvadedBlooms$TP + 0.1) -15.39 12.85 -1.197 0.254
##
## Residual standard error: 1.681 on 12 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1067, Adjusted R-squared: 0.03228
## F-statistic: 1.434 on 1 and 12 DF, p-value: 0.2543
red.invaded.lm.sum<-summary(red.invaded.lm)
red.uninvaded.lm.sum<-summary(red.uninvaded.lm)
ggplot(redcompleteBloom, aes(x=TP, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
geom_point(cex=2) +
geom_smooth(data=subset(redcompleteBloom, Dreissenids == "Uninvaded" ),
aes(x=TP, y=ESF_Microcystin, color=Dreissenids), method=lm, se=FALSE)+
theme_classic()+
scale_x_continuous(trans='log10') +
scale_y_continuous(trans='log10', breaks=c(1,100, 10000)) +
scale_color_grey()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).
ggsave("./output_data/Fig 2-1 Reduced.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).
#create linear models
redTNTP_invaded.lm<-lm(log(redinvadedBlooms$ESF_Microcystin+.1)~log(redinvadedBlooms$TN_TP+.1))
redTNTP_uninvaded.lm<-lm(log(reduninvadedBlooms$ESF_Microcystin+.1)~log(reduninvadedBlooms$TN_TP+.1))
summary(redTNTP_invaded.lm)
##
## Call:
## lm(formula = log(redinvadedBlooms$ESF_Microcystin + 0.1) ~ log(redinvadedBlooms$TN_TP +
## 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4117 -1.4734 -0.5156 1.9925 4.0266
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.0059 1.8018 -1.668 0.1057
## log(redinvadedBlooms$TN_TP + 0.1) 1.2073 0.4403 2.742 0.0102 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.027 on 30 degrees of freedom
## (21 observations deleted due to missingness)
## Multiple R-squared: 0.2004, Adjusted R-squared: 0.1737
## F-statistic: 7.518 on 1 and 30 DF, p-value: 0.0102
summary(redTNTP_uninvaded.lm)
##
## Call:
## lm(formula = log(reduninvadedBlooms$ESF_Microcystin + 0.1) ~
## log(reduninvadedBlooms$TN_TP + 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7063 -0.7244 -0.3029 0.0851 5.2936
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.073 4.405 -0.925 0.373
## log(reduninvadedBlooms$TN_TP + 0.1) 1.184 1.321 0.896 0.388
##
## Residual standard error: 1.722 on 12 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.06275, Adjusted R-squared: -0.01535
## F-statistic: 0.8034 on 1 and 12 DF, p-value: 0.3877
redTNTP_invaded.lm.sum<-summary(redTNTP_invaded.lm)
redTNTP_uninvaded.lm.sum<-summary(redTNTP_uninvaded.lm)
ggplot(redcompleteBloom, aes(x=TN_TP, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
geom_point(cex=2) +
geom_smooth(data=subset(redcompleteBloom, Dreissenids == "Invaded" ),
aes(x=TN_TP, y=ESF_Microcystin, color=Dreissenids), method=lm, se=FALSE)+
theme_classic()+
scale_x_continuous(trans='log10') +
scale_y_continuous(trans='log10', breaks=c(1,100, 10000)) +
scale_color_grey()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 21 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).
ggsave("./output_figures/Fig 2-3 Reduced.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 21 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).
#create linear models
red.chl.invaded.lm<-lm(log(redinvadedBlooms$ESF_Microcystin+.1)~log(redinvadedBlooms$ESF_FP_Chl.a+.1))
red.chl.uninvaded.lm<-lm(log(reduninvadedBlooms$ESF_Microcystin+.1)~log(reduninvadedBlooms$ESF_FP_Chl+.1))
summary(red.chl.invaded.lm)
##
## Call:
## lm(formula = log(redinvadedBlooms$ESF_Microcystin + 0.1) ~ log(redinvadedBlooms$ESF_FP_Chl.a +
## 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7338 -0.6808 0.1572 0.8108 2.6772
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.21172 0.42764 -0.495 0.623
## log(redinvadedBlooms$ESF_FP_Chl.a + 0.1) 0.62604 0.08358 7.491 1.03e-09
##
## (Intercept)
## log(redinvadedBlooms$ESF_FP_Chl.a + 0.1) ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.504 on 50 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5288, Adjusted R-squared: 0.5194
## F-statistic: 56.11 on 1 and 50 DF, p-value: 1.028e-09
summary(red.chl.uninvaded.lm)
##
## Call:
## lm(formula = log(reduninvadedBlooms$ESF_Microcystin + 0.1) ~
## log(reduninvadedBlooms$ESF_FP_Chl + 0.1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6438 -0.7245 0.1733 0.7195 1.3421
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.78141 0.28565 -2.736 0.0181
## log(reduninvadedBlooms$ESF_FP_Chl + 0.1) 0.73772 0.09183 8.034 3.6e-06
##
## (Intercept) *
## log(reduninvadedBlooms$ESF_FP_Chl + 0.1) ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9466 on 12 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.8432, Adjusted R-squared: 0.8301
## F-statistic: 64.54 on 1 and 12 DF, p-value: 3.602e-06
red.chl.invaded.lm.sum<-summary(red.chl.invaded.lm)
red.chl.uninvaded.lm.sum<-summary(red.chl.uninvaded.lm)
ggplot(redcompleteBloom, aes(x=ESF_FP_Chl.a, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
geom_point(cex=2) +
geom_smooth(method=lm, se=FALSE)+
theme_classic()+
scale_x_continuous(trans='log10') +
scale_y_continuous(trans='log10') +
scale_color_grey()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
ggsave("./output_figures/Fig 2-2 Reduced.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).